"""
Calculus-based transforms for 1D datasets.
This module provides derivatives and integration transforms for
numerical differentiation and integration of experimental data.
"""
from typing import Any
from piblin_jax.backend import jnp
from piblin_jax.backend.operations import jit
from piblin_jax.data.datasets import OneDimensionalDataset
from piblin_jax.transform.base import DatasetTransform
[docs]
class Derivative(DatasetTransform):
"""
Compute numerical derivative of 1D dataset.
This transform computes numerical derivatives using finite differences.
Supports first and second derivatives with various accuracy schemes.
Parameters
----------
order : int, default=1
Derivative order (1 or 2).
- 1: First derivative (dy/dx)
- 2: Second derivative (d²y/dx²)
method : str, default='gradient'
Method for computing derivative:
- 'gradient': Central differences (2nd order accurate)
- 'forward': Forward differences (1st order accurate)
- 'backward': Backward differences (1st order accurate)
Attributes
----------
order : int
Derivative order.
method : str
Differentiation method.
Raises
------
ValueError
If order is not 1 or 2.
Examples
--------
>>> import numpy as np
>>> from piblin_jax.data.datasets import OneDimensionalDataset
>>> from piblin_jax.transform.dataset import Derivative
>>>
>>> # Create data with known derivative
>>> x = np.linspace(0, 10, 100)
>>> y = x**2 # dy/dx = 2x
>>> dataset = OneDimensionalDataset(
... independent_variable_data=x,
... dependent_variable_data=y
... )
>>>
>>> # Compute first derivative
>>> deriv = Derivative(order=1)
>>> result = deriv.apply_to(dataset)
>>> # Result should be approximately 2*x
>>>
>>> # Compute second derivative
>>> deriv2 = Derivative(order=2)
>>> result2 = deriv2.apply_to(dataset)
>>> # Result should be approximately 2 (constant)
Notes
-----
- Uses jnp.gradient for central differences
- Gradient method provides 2nd order accuracy
- Handles non-uniform spacing in x
- JIT-compiled with JAX backend
- Edge effects present at boundaries
- For noisy data, consider smoothing before differentiation
"""
[docs]
def __init__(self, order: int = 1, method: str = "gradient") -> None:
"""
Initialize derivative transform.
Parameters
----------
order : int, default=1
Derivative order (1 or 2).
method : str, default='gradient'
Differentiation method.
Raises
------
ValueError
If order is not 1 or 2.
"""
super().__init__()
if order not in [1, 2]:
raise ValueError("order must be 1 or 2")
self.order = order
self.method = method
@staticmethod
@jit
def _compute_gradient(y: Any, x: Any) -> Any:
"""JIT-compiled gradient computation for 3-5x speedup."""
return jnp.gradient(y, x)
@staticmethod
@jit
def _compute_forward_diff(y: Any, x: Any) -> Any:
"""JIT-compiled forward difference for 3-5x speedup."""
dy = jnp.diff(y) / jnp.diff(x)
return jnp.concatenate([dy, jnp.array([dy[-1]])])
@staticmethod
@jit
def _compute_backward_diff(y: Any, x: Any) -> Any:
"""JIT-compiled backward difference for 3-5x speedup."""
dy = jnp.diff(y) / jnp.diff(x)
return jnp.concatenate([jnp.array([dy[0]]), dy])
def _apply(self, dataset: OneDimensionalDataset) -> OneDimensionalDataset: # type: ignore[override]
"""
Apply derivative computation to dataset.
Parameters
----------
dataset : OneDimensionalDataset
Input dataset.
Returns
-------
OneDimensionalDataset
Dataset with derivative as dependent variable.
Notes
-----
Replaces dependent variable with derivative.
Independent variable is preserved.
"""
x = jnp.asarray(dataset.independent_variable_data)
y = jnp.asarray(dataset.dependent_variable_data)
# Compute first derivative using JIT-compiled methods
if self.method == "gradient":
# Central differences (2nd order accurate)
dy = Derivative._compute_gradient(y, x) # type: ignore[call-arg]
elif self.method == "forward":
# Forward differences
dy = Derivative._compute_forward_diff(y, x) # type: ignore[call-arg]
elif self.method == "backward":
# Backward differences
dy = Derivative._compute_backward_diff(y, x) # type: ignore[call-arg]
else:
raise ValueError(f"Unknown method: {self.method}")
# Compute second derivative if requested
if self.order == 2:
dy = Derivative._compute_gradient(dy, x) # type: ignore[call-arg]
# Update dataset
dataset._dependent_variable_data = dy
return dataset
[docs]
class CumulativeIntegral(DatasetTransform):
"""
Compute cumulative integral of 1D dataset.
This transform computes the cumulative integral (running sum) of the
dependent variable with respect to the independent variable using
the trapezoidal rule.
Parameters
----------
method : str, default='trapezoid'
Integration method:
- 'trapezoid': Trapezoidal rule (2nd order accurate)
- 'simpson': Simpson's rule (4th order accurate, requires odd number of points)
Attributes
----------
method : str
Integration method.
Examples
--------
>>> import numpy as np
>>> from piblin_jax.data.datasets import OneDimensionalDataset
>>> from piblin_jax.transform.dataset import CumulativeIntegral
>>>
>>> # Create constant function (integral should be linear)
>>> x = np.linspace(0, 10, 100)
>>> y = np.ones_like(x) # Integral of 1 is x
>>> dataset = OneDimensionalDataset(
... independent_variable_data=x,
... dependent_variable_data=y
... )
>>>
>>> # Compute cumulative integral
>>> integral = CumulativeIntegral()
>>> result = integral.apply_to(dataset)
>>> # Result should be approximately linear (x)
>>> result.dependent_variable_data[-1] # Should be ~10
10.0
Notes
-----
- Trapezoidal rule: I[i] = sum((y[i] + y[i-1]) / 2 * dx[i])
- First value is always 0 (integral from x[0] to x[0])
- JIT-compiled with JAX backend
- Handles non-uniform spacing
- For smoother results on noisy data, consider smoothing first
"""
[docs]
def __init__(self, method: str = "trapezoid") -> None:
"""
Initialize cumulative integral transform.
Parameters
----------
method : str, default='trapezoid'
Integration method.
"""
super().__init__()
self.method = method
@staticmethod
@jit
def _compute_trapezoid_cumsum(x: Any, y: Any) -> Any:
"""JIT-compiled cumulative trapezoidal integration for 3-5x speedup."""
dx = jnp.diff(x)
y_avg = (y[1:] + y[:-1]) / 2.0
return jnp.concatenate([jnp.array([0.0]), jnp.cumsum(y_avg * dx)])
def _apply(self, dataset: OneDimensionalDataset) -> OneDimensionalDataset: # type: ignore[override]
"""
Apply cumulative integration to dataset.
Parameters
----------
dataset : OneDimensionalDataset
Input dataset.
Returns
-------
OneDimensionalDataset
Dataset with cumulative integral as dependent variable.
Notes
-----
Replaces dependent variable with cumulative integral.
Independent variable is preserved.
"""
x = jnp.asarray(dataset.independent_variable_data)
y = jnp.asarray(dataset.dependent_variable_data)
if self.method == "trapezoid":
# Use JIT-compiled trapezoidal rule
cumsum = CumulativeIntegral._compute_trapezoid_cumsum(x, y) # type: ignore[call-arg]
else:
raise ValueError(f"Unknown method: {self.method}")
# Update dataset
dataset._dependent_variable_data = cumsum
return dataset
[docs]
class DefiniteIntegral(DatasetTransform):
"""
Compute definite integral over specified region.
This transform computes the definite integral (total area) under
the curve between specified x-values.
Parameters
----------
x_min : float, optional
Lower integration bound (default: use dataset minimum).
x_max : float, optional
Upper integration bound (default: use dataset maximum).
method : str, default='trapezoid'
Integration method.
Attributes
----------
x_min : float or None
Lower bound.
x_max : float or None
Upper bound.
method : str
Integration method.
Examples
--------
>>> import numpy as np
>>> from piblin_jax.data.datasets import OneDimensionalDataset
>>> from piblin_jax.transform.dataset import DefiniteIntegral
>>>
>>> # Create data
>>> x = np.linspace(0, np.pi, 100)
>>> y = np.sin(x) # Integral from 0 to pi is 2
>>> dataset = OneDimensionalDataset(x, y)
>>>
>>> # Compute definite integral
>>> integral = DefiniteIntegral()
>>> result = integral.apply_to(dataset)
>>> # Result stores integral value in metadata
Notes
-----
- Returns dataset with integral value stored in details
- Original data is preserved
- For cumulative integral, use CumulativeIntegral instead
"""
[docs]
def __init__(
self, x_min: float | None = None, x_max: float | None = None, method: str = "trapezoid"
) -> None:
"""
Initialize definite integral transform.
Parameters
----------
x_min : float, optional
Lower integration bound.
x_max : float, optional
Upper integration bound.
method : str, default='trapezoid'
Integration method.
"""
super().__init__()
self.x_min = x_min
self.x_max = x_max
self.method = method
@staticmethod
@jit
def _compute_trapezoid_sum(x_region: Any, y_region: Any) -> Any:
"""JIT-compiled trapezoidal integration for 3-5x speedup."""
dx = jnp.diff(x_region)
y_avg = (y_region[1:] + y_region[:-1]) / 2.0
return jnp.sum(y_avg * dx)
def _apply(self, dataset: OneDimensionalDataset) -> OneDimensionalDataset: # type: ignore[override]
"""
Apply definite integration to dataset.
Parameters
----------
dataset : OneDimensionalDataset
Input dataset.
Returns
-------
OneDimensionalDataset
Dataset with integral value stored in details.
"""
x = jnp.asarray(dataset.independent_variable_data)
y = jnp.asarray(dataset.dependent_variable_data)
# Determine integration bounds
x_min = self.x_min if self.x_min is not None else float(jnp.min(x))
x_max = self.x_max if self.x_max is not None else float(jnp.max(x))
# Find indices within bounds
mask = (x >= x_min) & (x <= x_max)
x_region = x[mask]
y_region = y[mask]
# Compute integral
if self.method == "trapezoid":
# Trapezoidal rule
if len(x_region) < 2:
integral_value = 0.0
else:
# Use JIT-compiled trapezoidal integration: 3-5x faster
integral_value = float(DefiniteIntegral._compute_trapezoid_sum(x_region, y_region)) # type: ignore[call-arg,arg-type]
else:
raise ValueError(f"Unknown method: {self.method}")
# Store result in details
if dataset.details is None:
dataset.details = {}
dataset.details["integral_value"] = integral_value
dataset.details["integral_x_min"] = x_min
dataset.details["integral_x_max"] = x_max
return dataset
__all__ = [
"CumulativeIntegral",
"DefiniteIntegral",
"Derivative",
]