Source code for scaleinvariance.analysis.haar_fluctuation

import numpy as np
import warnings
from scipy.signal import convolve

from ..utils import process_lags, estimate_hurst_from_scaling



[docs]
def haar_fluctuation_analysis(data, order=1, max_sep=None, axis=0, lags='powers of 1.2', nan_behavior='raise'):
    """
    Calculate the mean absolute Haar fluctuation for regularly spaced scalar data along a given axis.

    Uses scipy.signal.convolve for efficient computation.
    
    Parameters
    -----------
    data : array-like
        N-dimensional scalar data array.
    order : int, optional
        Order of the fluctuation (default: 1).
    max_sep : int, optional
        Maximum separation/lag to compute. If None, defaults to (size along axis) - 1.
    axis : int, optional
        Axis along which to compute the Haar fluctuation (default: 0).
    lags : str, list, or np.ndarray, optional
        Option for selecting lag values. Can be:
          - A list or array of lags.
          - 'all': all integer lags from 1 to max_sep.
          - 'powers of X': lags as integers of powers of X, where X is any number (e.g., 'powers of 2', 'powers of 1.2').
    nan_behavior : str, optional
        Behavior when encountering NaN values in the data. Options are:
            - 'raise': Raise an error if NaN values are found (default).
            - 'ignore': Ignore NaN values in the calculation.
    
    Returns
    --------
    tuple (np.ndarray, np.ndarray)
        lags : 1-D array of lag values.
        haar : 1-D array of mean absolute haar fluctuation values corresponding to each lag.
    """
    if nan_behavior not in ['ignore','raise']: 
        raise ValueError("nan_behavior must be 'raise' or 'ignore'")
    
    data = np.asarray(data)
    if np.issubdtype(data.dtype, np.integer):
        data = data.astype(np.float32, copy=False)
    if (np.count_nonzero(np.isnan(data)) > 0) and (nan_behavior == 'raise'):
        raise ValueError("Input data contains NaN values; change nan_behavior to 'ignore' or check inputs.")
    
    # Validate axis
    if axis < 0: 
        axis += data.ndim
    if axis < 0 or axis >= data.ndim:
        raise ValueError("Axis is out of bounds for data dimensions.")
    if max_sep is not None and (not isinstance(max_sep, int) or max_sep <= 0):
        raise ValueError("'max_sep' must be a positive integer or None.")
    
    L = data.shape[axis]
    if max_sep is None:
        max_sep = L - 1
    else:
        max_sep = min(max_sep, L - 1)
    
    # Process lag options
    lags = process_lags(lags, max_sep, even_only=True)

    # Determine convolution method
    if np.any(np.isnan(data)):
        conv_method = 'direct'
    else:
        conv_method = 'auto'

    haar_flucs = []

    for i, lag in enumerate(lags):
        # If lag is odd or too large, assign NaN and skip processing.
        if lag % 2 != 0 or lag > max_sep:
            haar_flucs.append(np.nan)
            continue

        # Construct Haar kernel
        kernel = np.ones(lag, dtype=np.float64) / (lag/2)
        kernel[:lag//2] *= -1

        # Embed the 1D kernel into an n-D kernel with its values along the specified axis.
        kernel_shape = [1] * data.ndim
        kernel_shape[axis] = lag
        kernel = kernel.reshape(kernel_shape)

        # Convolution using scipy
        abs_conv = np.abs(convolve(data, kernel, mode='valid', method=conv_method))

        if (abs_conv.size == 0) or np.all(np.isnan(abs_conv)):
            haar_flucs.append(np.nan)
            continue
        
        if order != 1:
            abs_conv = abs_conv**order

        mean_absolute_haar_fluctuation = np.nanmean(abs_conv)
        haar_flucs.append(mean_absolute_haar_fluctuation)

    return np.array(lags, dtype=np.int64), np.array(haar_flucs, dtype=np.float64)




[docs]
def haar_fluctuation_hurst(data, min_sep=None, max_sep=None, axis=0, return_fit=False):
    """
    Calculate the Hurst exponent using Haar fluctuation analysis.
    
    For a self-similar process with Hurst exponent H, the Haar fluctuation scales as:
    F_H(r) ~ r^H
    
    This function estimates H by performing a linear regression on log(F_H(r)) vs log(r).
    
    Parameters
    -----------
    data : array-like
        N-dimensional scalar data array.
    min_sep : int, optional
        Minimum separation/lag to include in the fit.
    max_sep : int, optional
        Maximum separation/lag to include in the fit. Defaults to array_size - 1
        (a warning is emitted for small arrays).
    axis : int, optional
        Axis along which to compute the Haar fluctuation (default: 0).
    return_fit : bool, optional
        If True, return the lags, fluctuation values, and fitted line.
        
    Returns
    --------
    If return_fit is False:
        H : float
            Estimated Hurst exponent.
        uncertainty : float
            Standard error of the Hurst exponent estimate.
    If return_fit is True:
        H : float 
            Estimated Hurst exponent.
        uncertainty : float
            Standard error of the Hurst exponent estimate.
        lags : np.ndarray
            The lags used in the calculation.
        haar_values : np.ndarray
            The Haar fluctuation values.
        fit_line : np.ndarray
            The fitted line used to estimate H.
    """
    data = np.asarray(data)
    array_size = data.shape[axis]
    
    # Check minimum array size for reliable Hurst estimation
    if array_size < 16:
        raise ValueError(f"Array size along axis {axis} is {array_size}, but minimum 16 points required for Hurst estimation")
    
    # Set default separations
    if max_sep is None:
        if array_size <= 512:
            max_sep = array_size - 1
            warnings.warn(f"Array size ({array_size}) is small. Using max_sep={max_sep}. "
                         "Consider using larger arrays for more reliable Hurst estimation.",
                         UserWarning)
        else:
            max_sep = array_size - 1
    
    if min_sep is None:
        if array_size <= 512:
            min_sep = 1
        else:
            min_sep = 4
    
    # Calculate Haar fluctuations
    lags, haar_values = haar_fluctuation_analysis(data, order=1, max_sep=max_sep,
                                                 axis=axis, lags='powers of 1.2')
    
    # Use common Hurst estimation utility
    return estimate_hurst_from_scaling(lags, haar_values, min_sep, max_sep, return_fit)