Source code for hyeenna.estimators

import numpy as np
from scipy.special import psi
from sklearn.neighbors import NearestNeighbors

K = 5
METRIC = 'chebyshev'
EPS = 1e-10


[docs]def nearest_distances_vec(X: np.array, Y: np.array=None,
                          k: int=K, metric=METRIC) -> np.array:
    """Find vector distance to all k nearest neighbors"""
    knn = NearestNeighbors(n_neighbors=k, metric=METRIC)
    knn.fit(X)
    if Y is not None:
        _, l = knn.kneighbors(Y)
        dvec = np.array([X[i] - Y[ll[1:]] for i, ll in enumerate(l)])
    else:
        _, l = knn.kneighbors(X)
        dvec = np.array([X[i] - X[ll[1:]] for i, ll in enumerate(l)])
    return dvec


[docs]def nearest_distances(X: np.array, Y: np.array=None,
                      k: int=K, metric=METRIC) -> list:
    """Distance to the kth nearest neighbor"""
    knn = NearestNeighbors(n_neighbors=k, metric=METRIC)
    knn.fit(X)
    if Y is not None:
        d, _ = knn.kneighbors(Y)
    else:
        d, _ = knn.kneighbors(X)
    return d[:, -1]


[docs]def marginal_neighbors(X: np.array, R: np.array, metric=METRIC) -> list:
    """Number of neighbors within a certain radius"""
    knn = NearestNeighbors(metric=METRIC)
    knn.fit(X)
    return np.array([len(knn.radius_neighbors(p.reshape(1, -1), r)[0][0])
                     for p, r in zip(X, R)])


[docs]def entropy(X: np.array, k: int=K) -> float:
    """
    Computes the Shannon entropy of a random variable X using
    the KL nearest neighbor estimator.

    The formula is given by:
        $$
        \hat{H}(X) = \psi(N) - \psi(k) + \log(C_d)
        + d \langle \log(\epsilon) \rangle
        $$
    where
        - $N$ is the number of samples
        - $k$ is the number of neighbors
        - $\psi is the digamma function
        - $\rangle \cdot \rangle$ is the mean
        - $\epsilon_i$ is the 2 times the distance to the
          $k^{th}$ nearest neighbor.

    Parameters
    ----------
    X: np.array
        Sample from a random variable
    k: int, optional
        Number of neighbors to use in estimation

    Returns
    -------
    ent: float
        estimated entropy

    References
    ----------
    .. [0] Goria, M. N., Leonenko, N. N., Mergel, V. V., & Inverardi, P. L. N.
       (2005). A new class of random vector entropy estimators and its
       applications in testing statistical hypotheses. Journal of
       Nonparametric Statistics, 17(3), 277–297.
       https://doi.org/10.1080/104852504200026815
    """
    if not isinstance(X, np.ndarray):
        raise TypeError('Data must be given as a numpy array!')
    if len(X.shape) == 1:
        X = X.reshape(-1, 1)
    n, d = X.shape
    e = 2 * nearest_distances(X, k=k+1) + EPS * np.random.random(size=n)
    ent = d * np.mean(np.log(e)) + psi(n) - psi(k) + np.log(1)
    return ent


[docs]def conditional_entropy(X: np.array, Y: np.array, k: int=K) -> float:
    """
    Computes the conditional Shannon entropy of a sample of a random
    variable X given another sample of a random variable Y using an
    adaptation of the KL and KSG estimators

    Parameters
    ----------
    X: np.array
        Sample from a random variable
    Y: np.array
        Sample from a random variable
    k: int, optional
        Number of neighbors to use in estimation

    Returns
    -------
    cent: float
        estimated conditional entropy

    References
    ----------
    .. [0] Goria, M. N., Leonenko, N. N., Mergel, V. V., & Inverardi, P. L. N.
       (2005). A new class of random vector entropy estimators and its
       applications in testing statistical hypotheses. Journal of
       Nonparametric Statistics, 17(3), 277–297.
    .. [1] - Kraskov, A., Stögbauer, H., & Grassberger, P. (2004).
       Estimating mutual information. Physical Review E - Statistical Physics,
       Plasmas, Fluids, and Related Interdisciplinary Topics, 69(6), 16.
       https://doi.org/10.1103/PhysRevE.69.066138
    """
    if len(X.shape) == 1:
        X = X.reshape(-1, 1)
    if len(Y.shape) == 1:
        Y = Y.reshape(-1, 1)
    n, d = X.shape
    assert X.shape[0] == Y.shape[0], "{} - {}".format(X.shape, Y.shape)
    r = (nearest_distances(np.hstack([X, Y]), k=k+1)
         - EPS * np.random.random(size=n))
    n_y = marginal_neighbors(Y, r)
    return d * np.mean(np.log(2*r)) + np.mean(psi(n_y)) - psi(k)


[docs]def mutual_info(X: np.array, Y: np.array, k: int=K) -> float:
    """
    Computes the Mututal information of two random variables, X
    and Y, using the KSG nearest neighbor estimator.

    The formula is given by:
        $$
        \hat{I}(X,Y) = \psi(N) + \psi(k) - \frac{1}{k}
        - \langle \psi(n_X +1) + \psi(n_Y +1) \rangle
        $$
    where
        - $N$ is the number of samples
        - $k$ is the number of neighbors
        - $\psi is the digamma function
        - $\rangle \cdot\rangle$ is the mean
        - $\n_i$ is the number of points within the distance of
          the $k^{th}$ nearest neighbor when projected into the
          subspace spanned by $i$.

    Parameters
    ----------
    X: np.array
        A sample from a random variable
    Y: np.array
        A sample from a random variable
    k: int, optional
        Number of neighbors to use in estimation.

    Returns
    -------
    mi: float
        The mutual information

    References
    ----------
    ..  [0] - Kraskov, A., Stögbauer, H., & Grassberger, P. (2004).
        Estimating mutual information. Physical Review E - Statistical Physics,
        Plasmas, Fluids, and Related Interdisciplinary Topics, 69(6), 16.
        https://doi.org/10.1103/PhysRevE.69.066138
    """
    if len(X.shape) == 1:
        X = X.reshape(-1, 1)
    if len(Y.shape) == 1:
        Y = Y.reshape(-1, 1)
    n, d = X.shape
    assert X.shape[0] == Y.shape[0], "{} - {}".format(X.shape, Y.shape)
    r = (nearest_distances(np.hstack([X, Y]), k=k+1)
         - EPS * np.random.random(size=n))
    n_x = marginal_neighbors(X, r)
    n_y = marginal_neighbors(Y, r)
    return psi(n) + psi(k) - (1./k) - np.mean(psi(n_x+1) + psi(n_y+1))


[docs]def mi_local_nonuniformity_correction(X, *args, k: int=K,
                                      alpha=1.05, **kwargs):
    """
    Compute the local nonuniformity correction factor for strongly
    dependent variables. This correction is calculated based on the
    structure of the space of k-nearest neighbors. The volume of
    the hyper-rectangle of the maximum-norm bounding box for the
    k-nearest neighbor estimation is compared to that of the
    hyper-rectangle bounding the principal components of the covariance
    matrix of the k-nearest neighbor locations.

    Parameters
    ----------
    X: np.array
        A sample from a random variable
    *args: List[np.array]
        Samples from random variables
    k: int, optional
        Number of neighbors to use in estimation.
    alpha: float, optional
        Sensitivity parameter for filtering non-dependent volumes
    **kwargs: np.array
        Samples from random variables

    Returns
    -------
    lnc: float
        The correction factor to be subtracted from the mutual information

    References
    ----------
    .. [0] - Gao, S., Steeg, G. V., & Galstyan, A. (2014). Efficient
    Estimation of Mutual Information for Strongly Dependent Variables.
    Retrieved from https://arxiv.org/abs/1411.2003v3
    """
    data = [X, *args, *kwargs.values()]
    for i, d in enumerate(data):
        if len(d.shape) == 1:
            data[i] = d.reshape(-1, 1)
    assert data[0].shape == data[-1].shape
    n, d = data[0].shape
    M = (nearest_distances_vec(np.hstack(data), k=k+1)
         + EPS * np.random.random(size=(n, k, d)))

    # Compute volume of hypercube bounding r[i]
    V = np.prod(np.max(np.abs(M), axis=1), axis=1)

    # Compute volume of PCA of neighbors
    C = np.array([np.cov(M[i].T) for i in range(M.shape[0])])
    eigvals, eigvecs = np.linalg.eig(C)
    projected = np.abs([np.inner(eigvecs[i], M[i]) for i in range(M.shape[0])])
    V_bar = np.product(np.max(projected, axis=-1), axis=-1)

    # Compute correction factor
    lnc = np.log(V_bar / V)
    mask = (V_bar / V) >= alpha
    lnc[mask] = 0
    return np.mean(lnc)


[docs]def multi_mutual_info(X: np.array, *args, k: int=K, **kwargs) -> float:
    """
    Computes the multivariate mututal information of several random
    variables using the KSG nearest neighbor estimator.

    The formula is given by:
        $$
        \hat{I}(X_1,...,X_m) = (m-1)\cdot\psi(N) + \psi(k) - \frac{m-1}{k}
        - \langle \psi(n_{X_1} +1) + ... + \psi(n_{X_m} +1) \rangle
        $$
    where
        - $N$ is the number of samples
        - $m$ is the number of variables
        - $k$ is the number of neighbors
        - $\psi is the digamma function
        - $\rangle \cdot\rangle$ is the mean
        - $\n_i$ is the number of points within the distance of
          the $k^{th}$ nearest neighbor when projected into the
          subspace spanned by $i$.

    Parameters
    ----------
    X: np.array
        A sample from a random variable
    *args: List[np.array]
        Samples from random variables
    k: int, optional
        Number of neighbors to use in estimation.
    **kwargs: np.array
        Samples from random variables

    Returns
    -------
    mi: float
        The mutual information

    References
    ----------
    ..  [0] - Kraskov, A., Stögbauer, H., & Grassberger, P. (2004).
        Estimating mutual information. Physical Review E - Statistical Physics,
        Plasmas, Fluids, and Related Interdisciplinary Topics, 69(6), 16.
        https://doi.org/10.1103/PhysRevE.69.066138
    """
    data = [X, *args, *kwargs.values()]
    for i, d in enumerate(data):
        if len(d.shape) == 1:
            data[i] = d.reshape(-1, 1)
    assert data[0].shape == data[-1].shape
    n, d = data[0].shape
    r = (nearest_distances(np.hstack(data), k=k+1)
         - EPS * np.random.random(size=n))
    n_i = [marginal_neighbors(d, r) for d in data]
    return (psi(k) - (len(data)-1)/k + (len(data)-1) * psi(n)
            - np.mean(np.sum([psi(n+1) for n in n_i], axis=0)))


[docs]def conditional_mutual_info(
        X: np.array, Y: np.array, Z: np.array, k: int=K) -> float:
    """
    Compute the conditional mutual information

    Parameters
    ----------
    X: np.array
        Sample from random variable X
    Y: np.array
        Sample from random variable Y
    Z: np.array
        Sample from random variable Z
    k: int, optional
        Number of neighbors to use in estimation

    Returns
    -------
    estimated conditional mutual information

    References
    ----------
    .. [0] - Vlachos, I., & Kugiumtzis, D. (2010).
       Non-uniform state space reconstruction and coupling detection.
       https://doi.org/10.1103/PhysRevE.82.016207
    """
    if len(X.shape) == 1:
        X = X.reshape(-1, 1)
    if len(Y.shape) == 1:
        Y = Y.reshape(-1, 1)
    if len(Z.shape) == 1:
        Z = Z.reshape(-1, 1)
    nX, dX = X.shape
    nY, dY = Y.shape
    nZ, dZ = Z.shape
    assert nX == nY, "{} - {}".format(X.shape, Y.shape)
    assert nZ == nY, "{} - {}".format(Z.shape, Y.shape)
    xz = np.hstack([X, Z])
    yz = np.hstack([Y, Z])
    z = np.hstack([Z])
    xyz = np.hstack([X, Y, Z])
    r = nearest_distances(xyz, k=k+1) + EPS * np.random.random(size=nX)
    n_xz = marginal_neighbors(xz, r)
    n_yz = marginal_neighbors(yz, r)
    n_z = marginal_neighbors(z, r)
    return psi(k) - np.mean(psi(n_xz+1) + psi(n_yz+1) - psi(n_z+1))


[docs]def kl_divergence(P: np.array, Q: np.array, k: int=K):
    """
    Compute the KL divergence

    Parameters
    ----------
    P: np.array
        Sample from random variable P
    Q: np.array
        Sample from random variable Q
    k: int, optional
       Number of neighbors to use in estimation

    Returns
    -------
    estimated KL divergence D(P|Q)

    References
    ----------
    .. [0] - Wang, Q., Kulkarni, S. R., & Verdu, S. (2006). A Nearest-Neighbor
       Approach to Estimating Divergence between Continuous Random Vectors.
       In 2006 IEEE International Symposium on Information Theory.
       https://doi.org/10.1109/ISIT.2006.261842
    """
    if len(P.shape) == 1:
        P = P.reshape(-1, 1)

    if len(Q.shape) == 1:
        Q = Q.reshape(-1, 1)

    nP, dP = P.shape
    nQ, dQ = Q.shape
    assert dP == dQ, "{} - {}".format(P.shape, Q.shape)

    nu = nearest_distances(P, k=k+1, metric=METRIC)
    rho = nearest_distances(Q, P, k=k, metric=METRIC)

    div = (dP * (np.mean(np.log(nu)) - np.mean(np.log(rho)))
           + np.log(nQ / (nP-1)))
    return div


[docs]def transfer_entropy(X: np.array, Y: np.array,
                     tau: int=1, omega: int=1,
                     k: int=1, l: int=1,
                     neighbors: int=K, **kwargs) -> float:
    """
    Compute the transfer entropy from a source variable, X, to
    a target variable, Y.

    Parameters
    ----------
    X: np.array
        Source sample from a random variable X
    Y: np.array
        Target sample from a random variable Y
    tau: int (default: 1)
        Number of timestep lags for the source variable
    omega: int (default: 1)
        Number of timestep lags for the target variable conditioning
    k: int (default: 1)
        Width of window for the source variable.
    l: int (default: 1)
        Width of window for the target variable conditioning.
    neighbors: int (default: K)
        Parameter controlling the number of neighbors to use in estimation.
    **kwargs:
        Other arguments (undocumented, for internal usage)

    Returns
    -------
    transfer_entropy: float
        Computed via conditional_mutual_info

    References
    ----------
    .. [0] Schreiber, T. (2000). Measuring information transfer.
       Physical Review Letters, 85(2), 461–464.
       https://doi.org/10.1103/PhysRevLett.85.461
    """
    if len(X.shape) == 1:
        X = X.reshape(-1, 1)

    if len(Y.shape) == 1:
        Y = Y.reshape(-1, 1)

    start = np.max([k, l]) + np.max([tau, omega])
    nX, dX = X.shape
    nY, dY = Y.shape

    x = Y[start:]
    y, z = [], []

    for w in range(tau, tau+k):
        y.append(X[start-w:-w])

    for w in range(omega, omega+l):
        z.append(Y[start-w:-w])

    if k > 1:
        y = np.hstack(y)
    else:
        y = np.array(y).reshape(-1, 1)

    if l > 1:
        z = np.hstack(z)
    else:
        z = np.array(z).reshape(-1, 1)

    return conditional_mutual_info(x, y, z, k=neighbors)


[docs]def conditional_transfer_entropy(X: np.array, Y: np.array, Z: np.array,
                                 tau: int=1, omega: int=1, nu: int=1,
                                 k: int=1, l: int=1, m: int=1,
                                 neighbors: int=K, **kwargs) -> float:
    """
    Compute the transfer entropy from a source variable, X, to
    a target variable, Y, conditioned on other variables contained
    in Z.

    Parameters
    ----------
    X: np.array
        Source sample from a random variable X
    Y: np.array
        Target sample from a random variable Y
    Z: np.array
        Conditioning variable(s).
    tau: int (default: 1)
        Number of timestep lags for the source variable
    omega: int (default: 1)
        Number of timestep lags for the target variable conditioning
    nu: int (default: 1)
        Number of timestep lags for the source variable conditioning
    k: int (default: 1)
        Width of window for the source variable.
    l: int (default: 1)
        Width of window for the target variable conditioning.
    m: int (default: 1)
        Width of window for the source variable conditioning.
    neighbors: int (default: K)
        Parameter controlling the number of neighbors to use in estimation.
    **kwargs:
        Other arguments (undocumented, for internal usage)

    Returns
    -------
    conditional_transfer_entropy: float
        Computed via conditional_mutual_info

    References
    ----------
    .. [0] Schreiber, T. (2000). Measuring information transfer.
       Physical Review Letters, 85(2), 461–464.
       https://doi.org/10.1103/PhysRevLett.85.461

    """
    if len(X.shape) == 1:
        X = X.reshape(-1, 1)

    if len(Y.shape) == 1:
        Y = Y.reshape(-1, 1)

    if len(Z.shape) == 1:
        Z = Z.reshape(-1, 1)

    start = np.max([k, l, m]) + np.max([tau, omega, nu])
    nZ, dZ = Z.shape
    x = Y[start:]
    nX, dX = x.shape
    y, z1, z2 = [], [], []

    for w in range(tau, tau+k):
        y.append(X[start-w:-w])

    for w in range(omega, omega+l):
        z1.append(Y[start-w:-w])

    for w in range(nu, nu+m):
        z2.append(Z[start-w:-w].reshape(nX, dX))

    if k > 1:
        y = np.hstack(y)
    else:
        y = np.array(y).reshape(-1, 1)

    if l > 1:
        z1 = np.hstack(z1)
    else:
        z1 = np.array(z1).reshape(-1, 1)

    if len(z2):
        z2 = np.hstack(z2)
        z = np.hstack([np.array(z1), np.array(z2)])
    else:
        z = z1

    return conditional_mutual_info(x, y, z, k=neighbors)