Source code for pyunicorn.funcnet.coupling_analysis

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is part of pyunicorn.
# Copyright (C) 2008--2015 Jonathan F. Donges and pyunicorn authors
# URL: <http://www.pik-potsdam.de/members/donges/software>
# License: BSD (3-clause)

"""
Provides classes for analyzing spatially embedded complex networks, handling
multivariate data.
Written by Jakob Runge.
"""

import numpy                        # array object and fast numerics
from scipy import special, linalg   # special math functions

# import mpi                          # parallelized computations

from .. import weave_inline          # C++ inline code


#
#  Define class Coupling Analysis
#

[docs]class CouplingAnalysis(object): """ Contains methods to calculate coupling matrices from large arrays of scalar time series. Comprises linear and information-theoretic measures, lagged and directed couplings. """ # # Definitions of internal methods #
[docs] def __init__(self, data, silence_level=0): """ Initialize an instance of CouplingAnalysis from data array. :type data: multidimensional numpy array :arg data: The time series array with time in first dimension. :type silence_level: int >= 0 :arg silence_level: The higher, the less progress info is output. """ self.silence_level = silence_level """(int>=0) higher -> less progress info""" # Flatten array along spatial dimensions to allow # for more convinient indexing self.n_time = data.shape[0] self.data = data.reshape(self.n_time, -1) self.N = self.data.shape[1] # precalculation of p*log(p) needed for entropies self.plogp = None
[docs] def __str__(self): """Return a string representation of the CouplingAnalysis object.""" return 'CouplingAnalysis: %i variables, %i timesteps.' % ( self.N, self.n_time)
@staticmethod
[docs] def test_data(): """ Return example test data as discussed in pyunicorn description paper. """ numpy.random.seed(42) noise = numpy.random.randn(1000, 4) data = noise for t in xrange(2, 1000): data[t, 0] = 0.8 * data[t-1, 0] + noise[t, 0] data[t, 1] = 0.8 * data[t-1, 1] + 0.5 * data[t-2, 0] + noise[t, 1] data[t, 2] = 0.7 * data[t-1, 0] + noise[t, 2] data[t, 3] = 0.7 * data[t-2, 0] + noise[t, 3] return data
[docs] def symmetrize_by_absmax(self, similarity_matrix, lag_matrix): """ Returns symmetrized similarity matrix. Computes the largest absolute value for each pair (i,j) and (j,i) and returns the in-place changed matrices of measures and lags. A negative lag for an entry (i,j) in the lag_matrix then indicates a 'direction' j --> i regarding the peak of the lag function, and vice versa for a positive lag. **Example:** >>> coup_ana = CouplingAnalysis(CouplingAnalysis.test_data()) >>> similarity_matrix, lag_matrix = coup_ana.cross_correlation( ... tau_max=2) >>> r((similarity_matrix, lag_matrix)) (array([[ 1. , 0.698 , 0.7788, 0.7535], [ 0.4848, 1. , 0.4507, 0.52 ], [ 0.6219, 0.5704, 1. , 0.5996], [ 0.4833, 0.5503, 0.5002, 1. ]]), array([[0, 2, 1, 2], [0, 0, 0, 0], [0, 2, 0, 1], [0, 2, 0, 0]])) >>> r(coup_ana.symmetrize_by_absmax(similarity_matrix, lag_matrix)) (array([[ 1. , 0.698 , 0.7788, 0.7535], [ 0.698 , 1. , 0.5704, 0.5503], [ 0.7788, 0.5704, 1. , 0.5996], [ 0.7535, 0.5503, 0.5996, 1. ]]), array([[ 0, 2, 1, 2], [-2, 0, -2, -2], [-1, 2, 0, 1], [-2, 2, -1, 0]])) :type similarity_matrix: array-like [float] :arg similarity_matrix: array-like [node, node] matrix of similarity estimates :type lag_matrix: array-like [int>=0] :arg lag_matrix: array-like [node, node] matrix of lags :rtype: tuple of arrays :returns: the value at the absolute maximum and the (pos or neg) lag. """ N = self.N code = r""" int i,j; // loop over all node pairs for (i = 0; i < N; i++) { for (j = i+1; j < N; j++) { // calculate max and argmax by comparing to // previous value and storing max if (fabs(similarity_matrix(i,j)) > fabs(similarity_matrix(j,i))) { similarity_matrix(j,i) = similarity_matrix(i,j); lag_matrix(j,i) = -lag_matrix(i,j); } else { similarity_matrix(i,j) = similarity_matrix(j,i); lag_matrix(i,j) = -lag_matrix(j,i); } } } """ weave_inline(locals(), code, ['similarity_matrix', 'lag_matrix', 'N']) return similarity_matrix, lag_matrix # # Define methods to estimate similarity measures #
[docs] def cross_correlation(self, tau_max=0, lag_mode='max'): r""" Return cross correlation between all pairs of nodes. Two lag-modes are available (default: lag_mode='max'): lag_mode = 'all': Return 3-dimensional array of lagged cross correlations between all pairs of nodes. An entry :math:`(i, j, \tau)` corresponds to :math:`\rho(X^i_t-\tau, X^j_t)` for positive lags tau, i.e., the direction i --> j for :math:`\tau \ne 0`. lag_mode = 'max': Return matrix of absolute maxima and corresponding lags of lagged cross correlation (CC) between all pairs of nodes. Returns two usually asymmetric matrices of CC values and lags: In each matrix, an entry :math:`(i, j)` corresponds to the (positive or negative) value and lag, respectively, at absolute maximum of :math:`\rho(X^i_t-\tau, X^j_t)` for positive lags tau, i.e., the direction i --> j for :math:`\tau > 0`. The matrices are, thus, asymmetric. The function :meth:`.symmetrize_by_absmax` can be used to obtain a symmetric matrix. **Example:** >>> coup_ana = CouplingAnalysis(CouplingAnalysis.test_data()) >>> similarity_matrix, lag_matrix = coup_ana.cross_correlation( ... tau_max=5, lag_mode='max') >>> r((similarity_matrix, lag_matrix)) (array([[ 1. , 0.757 , 0.779 , 0.7536], [ 0.4847, 1. , 0.4502, 0.5197], [ 0.6219, 0.5844, 1. , 0.5992], [ 0.4827, 0.5509, 0.4996, 1. ]]), array([[0, 4, 1, 2], [0, 0, 0, 0], [0, 3, 0, 1], [0, 2, 0, 0]])) :type tau_max: int [int>=0] :arg tau_max: maximum lag of cross correlation lag function. :type lag_mode: str [('max'|'all')] :arg lag_mode: lag-mode of cross correlations to return. :rtype: 3D-array or tuple of matrices :returns: all-lag array or matrices of value and lag at the absolute maximum. """ data = self.data T, N = data.shape # Sanity checks if not isinstance(data, numpy.ndarray): raise TypeError("data is of type %s, " % type(data) + "must be numpy.ndarray") if N > T: print("Warning: data.shape = %s," % str(data.shape) + " is it of shape (observations, variables) ?") if numpy.isnan(data).sum() != 0: raise ValueError("NaNs in the data") if tau_max < 0: raise ValueError("tau_max = %d, " % (tau_max) + "but 0 <= tau_max") if lag_mode not in ['max', 'all']: raise ValueError("lag_mode = %s, " % (lag_mode) + "but must be one of 'max', 'all'") # Normalize time series to zero mean and unit variance for all lags corr_range = T - tau_max array = numpy.empty((tau_max + 1, N, corr_range), dtype="float32") for t in range(tau_max + 1): # Remove mean value from time series at each node array[t] = numpy.fastCopyAndTranspose( data[t:t+corr_range, :] - data[t:t+corr_range, :].mean(axis=0).reshape(1, N)) # Normalize the variance of anomalies to one array[t] /= array[t].std(axis=1).reshape(N, 1) # Correct for nodes with zero variance in their time series array[t][numpy.isnan(array[t])] = 0 if lag_mode == 'max': similarity_matrix = numpy.ones((self.N, self.N), dtype='float32') lag_matrix = numpy.zeros((self.N, self.N), dtype='int8') code = r""" int i,j,tau,k, argmax; double crossij, max; // loop over all node pairs, NOT symmetric due to time shifts! for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { if( i != j){ max = 0.0; argmax = 0; // loop over taus INCLUDING the last tau value for( tau = 0; tau < tau_max + 1; tau++) { crossij = 0; // here the actual cross correlation is calculated // assuming standardized arrays for ( k = 0; k < corr_range; k++) { crossij += array(tau, i, k) * array(tau_max, j, k); } // calculate max and argmax by comparing to // previous value and storing max if (fabs(crossij) > fabs(max)) { max = crossij; argmax = tau; } } similarity_matrix(i,j) = max/(float)(corr_range); lag_matrix(i,j) = tau_max - argmax; } } } """ weave_inline(locals(), code, ['array', 'similarity_matrix', 'lag_matrix', 'N', 'tau_max', 'corr_range']) return similarity_matrix, lag_matrix elif lag_mode == 'all': lagfuncs = numpy.zeros((self.N, self.N, tau_max+1), dtype='float32') code = r""" int i,j,tau,k, argmax; double crossij, max; // loop over all node pairs, NOT symmetric due to time shifts! for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { // loop over taus INCLUDING the last tau value for( tau = 0; tau < tau_max + 1; tau++) { crossij = 0; // here the actual cross correlation is calculated // assuming standardized arrays for ( k = 0; k < corr_range; k++) { crossij += array(tau, i, k) * array(tau_max, j, k); } lagfuncs(i,j,tau_max-tau) = crossij/(float)(corr_range); } } } """ weave_inline(locals(), code, ['array', 'lagfuncs', 'N', 'tau_max', 'corr_range']) return lagfuncs
[docs] def mutual_information(self, tau_max=0, estimator='knn', knn=10, bins=6, lag_mode='max'): r""" Return mutual information (MI) between all pairs of nodes. Three estimators are available: estimator = 'knn' (Recommended): Based on k-nearest-neighbors [Kraskov2004]_, version 1 in their paper. Larger k have smaller variance, but larger (typically negative) bias, and vice versa. estimator = 'binning': Binning estimator based on equal-quantile binning. estimator = 'gauss': Captures only linear part of association. Essentially estimates a transformed partial correlation. Two lag-modes are available (default: lag_mode='max'): lag_mode = 'all': Return 3-dimensional array of lagged MI between all pairs of nodes. An entry :math:`(i, j, \tau)` corresponds to :math:`I(X^i_t-\tau, X^j_t)` for positive lags tau, i.e., the direction i --> j for :math:`\tau \ne 0`. lag_mode = 'max': Return matrix of absolute maxima and corresponding lags of lagged MI between all pairs of nodes. Returns two usually asymmetric matrices of MI values and lags: In each matrix, an entry :math:`(i, j)` corresponds to the value and lag, respectively, at absolute maximum of :math:`I(X^i_t-\tau, X^j_t)` for positive lags tau, i.e., the direction i --> j for :math:`\tau > 0`. The matrices are, thus, asymmetric. The function :meth:`.symmetrize_by_absmax` can be used to obtain a symmetric matrix. Reference: [Kraskov2004]_ **Example:** >>> coup_ana = CouplingAnalysis(CouplingAnalysis.test_data()) >>> similarity_matrix, lag_matrix = coup_ana.mutual_information( ... tau_max=5, knn=10, estimator='knn') >>> similarity_matrix, lag_matrix (array([[ 4.65048742, 0.43874303, 0.46520019, 0.41257444], [ 0.14704162, 4.65048742, 0.10645443, 0.16393046], [ 0.24829103, 0.2125767 , 4.65048742, 0.22044939], [ 0.12093173, 0.19902836, 0.14530452, 4.65048742]], dtype=float32), array([[0, 4, 1, 2], [0, 0, 0, 0], [0, 2, 0, 1], [0, 2, 0, 0]], dtype=int8)) :type tau_max: int [int>=0] :arg tau_max: maximum lag of MI lag function. :type knn: int [int>=1] :arg knn: nearest-neighbor MI estimation parameter. (default: 10) :type bins: int [int>=2] :arg bins: binning MI estimation parameter. (default: 6) :type estimator: str [('knn'|'binning'|'gauss')] :arg estimator: MI estimator. (default: 'knn') :type lag_mode: str [('max'|'all')] :arg lag_mode: lag-mode of MI to return. :rtype: 3D-array or tuple of matrices :returns: all-lag array or matrices of value and lag at the absolute maximum. """ data = self.data T, N = data.shape # Sanity checks if not isinstance(data, numpy.ndarray): raise TypeError("data is of type %s, " % type(data) + "must be numpy.ndarray") if N > T: print("Warning: data.shape = %s," % str(data.shape) + " is it of shape (observations, variables) ?") if T < 500: print("Warning: T = %s ," % str(T) + " unreliable estimation using MI estimator") if numpy.isnan(data).sum() != 0: raise ValueError("NaNs in the data") if tau_max < 0: raise ValueError("tau_max = %d, " % (tau_max) + "but 0 <= tau_max") if estimator == 'knn': if knn > T/2. or knn < 1: raise ValueError("knn = %s , " % str(knn) + "should be between 1 and T/2") if lag_mode == 'max': similarity_matrix = numpy.ones((N, N), dtype='float32') lag_matrix = numpy.zeros((N, N), dtype='int8') elif lag_mode == 'all': lagfuncs = numpy.zeros((N, N, tau_max+1), dtype='float32') if estimator == 'binning': self.plogp = self.create_plogp(T) for i in range(N): for j in range(N): maximum = 0. lag_at_max = 0 for tau in range(tau_max + 1): X = [(i, -tau)] Y = [(j, 0)] Z = [] XYZ = X + Y + Z dim = len(XYZ) max_lag = tau_max array = numpy.zeros((dim, T - max_lag)) for d, node in enumerate(XYZ): var, lag = node array[d, :] = data[max_lag + lag: T + lag, var] if estimator == 'knn': xyz = numpy.array([0, 1]) k_xz, k_yz, k_z = self._get_nearest_neighbors( array=array, xyz=xyz, k=knn, standardize=True) ixy_z = (special.digamma(knn) + (- special.digamma(k_xz) - special.digamma(k_yz) + special.digamma(k_z)).mean()) elif estimator == 'binning': symb_array = self._quantile_bin_array(array, bins=bins) # High-dimensional Histogram hist = self.bincount_hist(symb_array) # Entropies by use of vectorized function plogp hxyz = (-(self.plogp(hist)).sum() + self.plogp(T))/float(T) hxz = (-(self.plogp(hist.sum(axis=1))).sum() + self.plogp(T))/float(T) hyz = (-(self.plogp(hist.sum(axis=0))).sum() + self.plogp(T))/float(T) hz = (-(self.plogp(hist.sum(axis=0).sum(axis=0))).sum() + self.plogp(T))/float(T) ixy_z = hxz + hyz - hz - hxyz elif estimator == 'gauss': # Standardize array -= array.mean(axis=1).reshape(dim, 1) array /= array.std(axis=1).reshape(dim, 1) if numpy.isnan(array).sum() != 0: raise ValueError("nans after standardizing, " "possibly constant array!""") x = array[0, :] y = array[1, :] ixy_z = self._par_corr_to_cmi( numpy.dot(x, y) / numpy.sqrt(numpy.dot(x, x) * numpy.dot(y, y))) if lag_mode == 'max': if ixy_z > maximum: maximum = ixy_z lag_at_max = tau elif lag_mode == 'all': lagfuncs[i, j, tau] = ixy_z if lag_mode == 'max': similarity_matrix[i, j] = maximum lag_matrix[i, j] = lag_at_max if lag_mode == 'max': return similarity_matrix, lag_matrix elif lag_mode == 'all': return lagfuncs
[docs] def information_transfer(self, tau_max=0, estimator='knn', knn=10, past=1, cond_mode='ity', lag_mode='max'): r""" Return bivariate information transfer between all pairs of nodes. Two condition modes of information transfer are available as described in [Runge2012b]_. Information transfer to Y (ITY): .. math:: I(X^i_t-\tau, X^j_t | X^j_t-1, ...,X^j_t-past) Momentary information transfer (MIT): .. math:: I(X^i_t-\tau, X^j_t | X^j_t-1, ...,X^j_t-past, X^i_t-\tau-1, ...,X^j_t-\tau-past) Two estimators are available: estimator = 'knn' (Recommended): Based on k-nearest-neighbors [Kraskov2004]_, version 1 in their paper. Larger k have smaller variance, but larger (typically negative) bias, and vice versa. estimator = 'gauss': Captures only linear part of association. Essentially estimates a transformed partial correlation. Two lag-modes are available (default: lag_mode='max'): lag_mode = 'all': Return 3-dimensional array of lag-functions between all pairs of nodes. An entry :math:`(i, j, \tau)` corresponds to :math:`I(X^i_t-\tau, X^j_t | ...)` for positive lags tau, i.e., the direction i --> j for :math:`\tau \ne 0`. lag_mode = 'max': Return matrix of absolute maxima and corresponding lags of lag-functions between all pairs of nodes. Returns two usually asymmetric matrices of values and lags: In each matrix, an entry :math:`(i, j)` corresponds to the value and lag, respectively, at absolute maximum of :math:`I(X^i_t-\tau, X^j_t | ...)` for positive lags tau, i.e., the direction i --> j for :math:`\tau > 0`. The matrices are, thus, asymmetric. The function :meth:`.symmetrize_by_absmax` can be used to obtain a symmetric matrix. **Example:** >>> coup_ana = CouplingAnalysis(CouplingAnalysis.test_data()) >>> similarity_matrix, lag_matrix = coup_ana.information_transfer( ... tau_max=5, estimator='knn', knn=10) >>> r((similarity_matrix, lag_matrix)) (array([[ 0. , 0.1544, 0.3261, 0.3047], [ 0.0218, 0. , 0.0394, 0.0976], [ 0.0134, 0.0663, 0. , 0.1502], [ 0.0066, 0.0694, 0.0401, 0. ]]), array([[0, 2, 1, 2], [5, 0, 0, 0], [5, 1, 0, 1], [5, 0, 0, 0]])) :type tau_max: int [int>=0] :arg tau_max: maximum lag of ITY lag function. :type past: int [int>=1] :arg past: maximum lag of past history. :type knn: int [int>=1] :arg knn: nearest-neighbor ITY estimation parameter. (default: 10) :type bins: int [int>=2] :arg bins: binning ITY estimation parameter. (default: 6) :type estimator: str [('knn'|'gauss')] :arg estimator: ITY estimator. (default: 'knn') :type cond_mode: str [('ity'|'mit')] :arg cond_mode: condition mode. (default: 'ity') :type lag_mode: str [('max'|'all')] :arg lag_mode: lag-mode of ITY to return. :rtype: 3D-array or tuple of matrices :returns: all-lag array or matrices of value and lag at the absolute maximum. """ data = self.data T, N = data.shape # Sanity checks if not isinstance(data, numpy.ndarray): raise TypeError("data is of type %s, " % type(data) + "must be numpy.ndarray") if N > T: print("Warning: data.shape = %s," % str(data.shape) + " is it of shape (observations, variables) ?") if estimator == 'knn' and T < 500: print("Warning: T = %s ," % str(T) + " unreliable estimation using knn-estimator") if numpy.isnan(data).sum() != 0: raise ValueError("NaNs in the data") if tau_max < 0: raise ValueError("tau_max = %d, " % (tau_max) + "but 0 <= tau_max") if estimator == 'knn': if knn > T/2. or knn < 1: raise ValueError("knn = %s , " % str(knn) + "should be between 1 and T/2") if lag_mode == 'max': similarity_matrix = numpy.ones((N, N), dtype='float32') lag_matrix = numpy.zeros((N, N), dtype='int8') elif lag_mode == 'all': lagfuncs = numpy.zeros((N, N, tau_max+1), dtype='float32') for i in range(N): for j in range(N): maximum = 0. lag_at_max = 0 for tau in range(tau_max + 1): X = [(i, -tau)] Y = [(j, 0)] if cond_mode == 'ity': Z = [(j, -p) for p in range(1, past + 1)] elif cond_mode == 'mit': Z = [(j, -p) for p in range(1, past + 1)] Z += [(i, -tau - p) for p in range(1, past + 1)] XYZ = X + Y + Z dim = len(XYZ) max_lag = tau_max + past array = numpy.zeros((dim, T - max_lag)) for d, node in enumerate(XYZ): var, lag = node array[d, :] = data[max_lag + lag: T + lag, var] if estimator == 'knn': xyz = numpy.array([0, 1]) k_xz, k_yz, k_z = self._get_nearest_neighbors( array=array, xyz=xyz, k=knn, standardize=True) ixy_z = (special.digamma(knn) + (- special.digamma(k_xz) - special.digamma(k_yz) + special.digamma(k_z)).mean()) elif estimator == 'gauss': if numpy.isnan(array).sum() != 0: raise ValueError("nans in the array!") # Standardize array -= array.mean(axis=1).reshape(dim, 1) array /= array.std(axis=1).reshape(dim, 1) if numpy.isnan(array).sum() != 0: raise ValueError("nans after standardizing, " "possibly constant array!""") x = array[0, :] y = array[1, :] if len(array) > 2: confounds = array[2:, :] ortho_confounds = linalg.qr( numpy.fastCopyAndTranspose(confounds), mode='economic')[0].T x -= numpy.dot(numpy.dot(ortho_confounds, x), ortho_confounds) y -= numpy.dot(numpy.dot(ortho_confounds, y), ortho_confounds) ixy_z = self._par_corr_to_cmi( numpy.dot(x, y) / numpy.sqrt(numpy.dot(x, x) * numpy.dot(y, y))) if lag_mode == 'max': if ixy_z > maximum: maximum = ixy_z lag_at_max = tau elif lag_mode == 'all': lagfuncs[i, j, tau] = ixy_z if lag_mode == 'max': similarity_matrix[i, j] = maximum lag_matrix[i, j] = lag_at_max if lag_mode == 'max': similarity_matrix[range(N), range(N)] = 0. elif lag_mode == 'all': lagfuncs[range(N), range(N), 0.] = 0. if lag_mode == 'max': return similarity_matrix, lag_matrix elif lag_mode == 'all': return lagfuncs # # Define helper methods #
@staticmethod
[docs] def _par_corr_to_cmi(par_corr): """ Transformation of partial correlation to conditional mutual information scale using the (multivariate) Gaussian assumption. :type par_corr: float or array :arg par_corr: partial correlation :rtype: float :returns: transformed partial correlation. """ return -0.5*numpy.log(1. - par_corr**2)
@staticmethod
[docs] def _get_nearest_neighbors(array, xyz, k, standardize=True): """ Returns nearest-neighbors for conditional mutual information estimator. Reference: [Kraskov2004]_ :type array: array (float) :arg array: data array. :type xyz: array [int(0|1|2)] :arg xyz: identifier of X, Y, Z in CMI :type k: int [int>=1] :arg k: nearest-neighbor MI estimation parameter. :type standardize: bool :arg standardize: standardize array before estimation. (default: True) :rtype: tuple of arrays :returns: nearest neighbors for each sample point. """ dim, T = array.shape if standardize: # Standardize array = array.astype('float32') array -= array.mean(axis=1).reshape(dim, 1) array /= array.std(axis=1).reshape(dim, 1) # If the time series is constant, return nan rather than raising # Exception if numpy.isnan(array).sum() != 0: raise ValueError("nans after standardizing, " "possibly constant array!") # Add noise to destroy ties... array += 1E-10 * numpy.random.rand(array.shape[0], array.shape[1]) # Flatten for fast weave.inline access array = array.flatten() dim_x = int(numpy.where(xyz == 0)[0][-1] + 1) dim_y = int(numpy.where(xyz == 1)[0][-1] + 1 - dim_x) # dim_z = maxdim - dim_x - dim_y # Initialize k_xz = numpy.zeros(T, dtype='int32') k_yz = numpy.zeros(T, dtype='int32') k_z = numpy.zeros(T, dtype='int32') code = """ int i, j, index=0, t, m, n, d, kxz, kyz, kz, indexfound[T];// double dz=0., dxyz=0., dx=0., dy=0., eps, epsmax; double dist[T*dim], dxyzarray[k+1]; // Loop over time for(i = 0; i < T; i++){ // Growing cube algorithm: Test if n = #(points in epsilon- // environment of reference point i) > k // Start with epsilon for which 95% of points are inside the cube // for a multivariate Gaussian // eps increased by 2 later, also the initial eps eps = 1.*pow( float(k)/float(T), 1./dim); // n counts the number of neighbors n = 0; while(n <= k){ // Increase cube size eps *= 2.; // Start with zero again n = 0; // Loop through all points for(t = 0; t < T; t++){ d = 0; while(fabs(array[d*T + i] - array[d*T + t] ) < eps && d < dim){ d += 1; } // If all distances are within eps, the point t lies // within eps and n is incremented if(d == dim){ indexfound[n] = t; n += 1; } } } // Calculate distance to points only within epsilon environment // according to maximum metric for(j = 0; j < n; j++){ index = indexfound[j]; dxyz = 0.; for(d = 0; d < dim; d++){ dist[d*T + j] = fabs(array[d*T + i] - array[d*T + index]); dxyz = fmax( dist[d*T + j], dxyz); } // Use insertion sort dxyzarray[j] = dxyz; if ( j > 0 ){ // only list of smallest k+1 distances need to be kept! m = fmin(k, j-1); while ( m >= 0 && dxyzarray[m] > dxyz ){ dxyzarray[m+1] = dxyzarray[m]; m -= 1; } dxyzarray[m+1] = dxyz; } } // Epsilon of k-th nearest neighbor in joint space epsmax = dxyzarray[k]; // Count neighbors within epsmax in subspaces, since the reference // point is included, all neighbors are at least 1 kz = 0; kxz = 0; kyz = 0; for(j = 0; j < T; j++){ // X-subspace dx = fabs(array[0*T + i] - array[0*T + j]); for(d = 1; d < dim_x; d++){ dist[d*T + j] = fabs(array[d*T + i] - array[d*T + j]); dx = fmax( dist[d*T + j], dx); } // Y-subspace dy = fabs(array[dim_x*T + i] - array[dim_x*T + j]); for(d = dim_x; d < dim_x+dim_y; d++){ dist[d*T + j] = fabs(array[d*T + i] - array[d*T + j]); dy = fmax( dist[d*T + j], dy); } // Z-subspace, if empty, dz stays 0 dz = 0.; for(d = dim_x+dim_y; d < dim ; d++){ dist[d*T + j] = fabs(array[d*T + i] - array[d*T + j]); dz = fmax( dist[d*T + j], dz); } // For no conditions, kz is counted up to T if (dz < epsmax){ kz += 1; if (dx < epsmax){ kxz += 1; } if (dy < epsmax){ kyz += 1; } } } // Write to numpy arrays k_xz[i] = kxz; k_yz[i] = kyz; k_z[i] = kz; } """ weave_inline(locals(), code, ['array', 'T', 'dim_x', 'dim_y', 'k', 'dim', 'k_xz', 'k_yz', 'k_z'], blitz=False, headers=["<math.h>"]) return k_xz, k_yz, k_z
@staticmethod
[docs] def _quantile_bin_array(array, bins=6): """ Returns symbolified array with aequi-quantile binning. This partition results in a uniform distribution of the marginals. :type array: array :arg array: data :type bins: int :arg bins: number of bins :rtype: array :returns: converted data """ dim, T = array.shape # get the bin quantile steps bin_edge = numpy.ceil(T/float(bins)) symb_array = numpy.zeros((dim, T), dtype='int32') # get the lower edges of the bins for every time series edges = numpy.sort(array, axis=1)[:, ::bin_edge] bins = edges.shape[1] # This gives the symbolic time series symb_array = (array.reshape(dim, T, 1) >= edges.reshape(dim, 1, bins)).sum(axis=2) - 1 return symb_array
@staticmethod
[docs] def bincount_hist(symb_array): """ Computes histogram from symbolic array. :type symb_array: array of integers :arg symb_array: symbolic data :rtype: array :returns: (unnormalized) histogram """ base = int(symb_array.max() + 1) D, T = symb_array.shape # Needed because numpy.bincount cannot process longs assert isinstance(base**D, int) assert base**D*16./8./1024.**3 < 3., ( 'Dimension exceeds 3 GB of necessary memory ' + '(change this code line if you got more...)') assert D*base**D < 2**65, ( 'base = %d, D = %d: Histogram failed: ' + 'dimension D*base**D exceeds int64 data type') % (base, D) flathist = numpy.zeros((base**D), dtype='int16') multisymb = numpy.zeros(T, dtype='int64') for i in xrange(D): multisymb += symb_array[i, :]*base**i result = numpy.bincount(multisymb) flathist[:len(result)] += result return flathist.reshape(tuple([base, base] + [base for i in range(D-2)])).T
@staticmethod
[docs] def create_plogp(T): """ Precalculation of p*log(p) needed for entropies. :type T: int :arg T: sample length :rtype: array :returns: p*log(p) array from p=1 to p=T """ gfunc = numpy.zeros(T+1) gfunc[1:] = numpy.arange(1, T+1, 1)*numpy.log(numpy.arange(1, T+1, 1)) return numpy.vectorize(lambda t: gfunc[t])