import numpy as np
import warnings
from scipy.signal import convolve
from ..utils import process_lags, estimate_hurst_from_scaling
[docs]
def haar_fluctuation_analysis(data, order=1, max_sep=None, axis=0, lags='powers of 1.2', nan_behavior='raise'):
"""
Calculate the mean absolute Haar fluctuation for regularly spaced scalar data along a given axis.
Uses scipy.signal.convolve for efficient computation.
Parameters
-----------
data : array-like
N-dimensional scalar data array.
order : int, optional
Order of the fluctuation (default: 1).
max_sep : int, optional
Maximum separation/lag to compute. If None, defaults to (size along axis) - 1.
axis : int, optional
Axis along which to compute the Haar fluctuation (default: 0).
lags : str, list, or np.ndarray, optional
Option for selecting lag values. Can be:
- A list or array of lags.
- 'all': all integer lags from 1 to max_sep.
- 'powers of X': lags as integers of powers of X, where X is any number (e.g., 'powers of 2', 'powers of 1.2').
nan_behavior : str, optional
Behavior when encountering NaN values in the data. Options are:
- 'raise': Raise an error if NaN values are found (default).
- 'ignore': Ignore NaN values in the calculation.
Returns
--------
tuple (np.ndarray, np.ndarray)
lags : 1-D array of lag values.
haar : 1-D array of mean absolute haar fluctuation values corresponding to each lag.
"""
if nan_behavior not in ['ignore','raise']:
raise ValueError("nan_behavior must be 'raise' or 'ignore'")
data = np.asarray(data)
if np.issubdtype(data.dtype, np.integer):
data = data.astype(np.float32, copy=False)
if (np.count_nonzero(np.isnan(data)) > 0) and (nan_behavior == 'raise'):
raise ValueError("Input data contains NaN values; change nan_behavior to 'ignore' or check inputs.")
# Validate axis
if axis < 0:
axis += data.ndim
if axis < 0 or axis >= data.ndim:
raise ValueError("Axis is out of bounds for data dimensions.")
if max_sep is not None and (not isinstance(max_sep, int) or max_sep <= 0):
raise ValueError("'max_sep' must be a positive integer or None.")
L = data.shape[axis]
if max_sep is None:
max_sep = L - 1
else:
max_sep = min(max_sep, L - 1)
# Process lag options
lags = process_lags(lags, max_sep, even_only=True)
# Determine convolution method
if np.any(np.isnan(data)):
conv_method = 'direct'
else:
conv_method = 'auto'
haar_flucs = []
for i, lag in enumerate(lags):
# If lag is odd or too large, assign NaN and skip processing.
if lag % 2 != 0 or lag > max_sep:
haar_flucs.append(np.nan)
continue
# Construct Haar kernel
kernel = np.ones(lag, dtype=np.float64) / (lag/2)
kernel[:lag//2] *= -1
# Embed the 1D kernel into an n-D kernel with its values along the specified axis.
kernel_shape = [1] * data.ndim
kernel_shape[axis] = lag
kernel = kernel.reshape(kernel_shape)
# Convolution using scipy
abs_conv = np.abs(convolve(data, kernel, mode='valid', method=conv_method))
if (abs_conv.size == 0) or np.all(np.isnan(abs_conv)):
haar_flucs.append(np.nan)
continue
if order != 1:
abs_conv = abs_conv**order
mean_absolute_haar_fluctuation = np.nanmean(abs_conv)
haar_flucs.append(mean_absolute_haar_fluctuation)
return np.array(lags, dtype=np.int64), np.array(haar_flucs, dtype=np.float64)
[docs]
def haar_fluctuation_hurst(data, min_sep=None, max_sep=None, axis=0, return_fit=False):
"""
Calculate the Hurst exponent using Haar fluctuation analysis.
For a self-similar process with Hurst exponent H, the Haar fluctuation scales as:
F_H(r) ~ r^H
This function estimates H by performing a linear regression on log(F_H(r)) vs log(r).
Parameters
-----------
data : array-like
N-dimensional scalar data array.
min_sep : int, optional
Minimum separation/lag to include in the fit.
max_sep : int, optional
Maximum separation/lag to include in the fit. Defaults to array_size - 1
(a warning is emitted for small arrays).
axis : int, optional
Axis along which to compute the Haar fluctuation (default: 0).
return_fit : bool, optional
If True, return the lags, fluctuation values, and fitted line.
Returns
--------
If return_fit is False:
H : float
Estimated Hurst exponent.
uncertainty : float
Standard error of the Hurst exponent estimate.
If return_fit is True:
H : float
Estimated Hurst exponent.
uncertainty : float
Standard error of the Hurst exponent estimate.
lags : np.ndarray
The lags used in the calculation.
haar_values : np.ndarray
The Haar fluctuation values.
fit_line : np.ndarray
The fitted line used to estimate H.
"""
data = np.asarray(data)
array_size = data.shape[axis]
# Check minimum array size for reliable Hurst estimation
if array_size < 16:
raise ValueError(f"Array size along axis {axis} is {array_size}, but minimum 16 points required for Hurst estimation")
# Set default separations
if max_sep is None:
if array_size <= 512:
max_sep = array_size - 1
warnings.warn(f"Array size ({array_size}) is small. Using max_sep={max_sep}. "
"Consider using larger arrays for more reliable Hurst estimation.",
UserWarning)
else:
max_sep = array_size - 1
if min_sep is None:
if array_size <= 512:
min_sep = 1
else:
min_sep = 4
# Calculate Haar fluctuations
lags, haar_values = haar_fluctuation_analysis(data, order=1, max_sep=max_sep,
axis=axis, lags='powers of 1.2')
# Use common Hurst estimation utility
return estimate_hurst_from_scaling(lags, haar_values, min_sep, max_sep, return_fit)