Source code for piblin_jax.transform.dataset.calculus

"""
Calculus-based transforms for 1D datasets.

This module provides derivatives and integration transforms for
numerical differentiation and integration of experimental data.
"""

from typing import Any

from piblin_jax.backend import jnp
from piblin_jax.backend.operations import jit
from piblin_jax.data.datasets import OneDimensionalDataset
from piblin_jax.transform.base import DatasetTransform


[docs] class Derivative(DatasetTransform): """ Compute numerical derivative of 1D dataset. This transform computes numerical derivatives using finite differences. Supports first and second derivatives with various accuracy schemes. Parameters ---------- order : int, default=1 Derivative order (1 or 2). - 1: First derivative (dy/dx) - 2: Second derivative (d²y/dx²) method : str, default='gradient' Method for computing derivative: - 'gradient': Central differences (2nd order accurate) - 'forward': Forward differences (1st order accurate) - 'backward': Backward differences (1st order accurate) Attributes ---------- order : int Derivative order. method : str Differentiation method. Raises ------ ValueError If order is not 1 or 2. Examples -------- >>> import numpy as np >>> from piblin_jax.data.datasets import OneDimensionalDataset >>> from piblin_jax.transform.dataset import Derivative >>> >>> # Create data with known derivative >>> x = np.linspace(0, 10, 100) >>> y = x**2 # dy/dx = 2x >>> dataset = OneDimensionalDataset( ... independent_variable_data=x, ... dependent_variable_data=y ... ) >>> >>> # Compute first derivative >>> deriv = Derivative(order=1) >>> result = deriv.apply_to(dataset) >>> # Result should be approximately 2*x >>> >>> # Compute second derivative >>> deriv2 = Derivative(order=2) >>> result2 = deriv2.apply_to(dataset) >>> # Result should be approximately 2 (constant) Notes ----- - Uses jnp.gradient for central differences - Gradient method provides 2nd order accuracy - Handles non-uniform spacing in x - JIT-compiled with JAX backend - Edge effects present at boundaries - For noisy data, consider smoothing before differentiation """
[docs] def __init__(self, order: int = 1, method: str = "gradient") -> None: """ Initialize derivative transform. Parameters ---------- order : int, default=1 Derivative order (1 or 2). method : str, default='gradient' Differentiation method. Raises ------ ValueError If order is not 1 or 2. """ super().__init__() if order not in [1, 2]: raise ValueError("order must be 1 or 2") self.order = order self.method = method
@staticmethod @jit def _compute_gradient(y: Any, x: Any) -> Any: """JIT-compiled gradient computation for 3-5x speedup.""" return jnp.gradient(y, x) @staticmethod @jit def _compute_forward_diff(y: Any, x: Any) -> Any: """JIT-compiled forward difference for 3-5x speedup.""" dy = jnp.diff(y) / jnp.diff(x) return jnp.concatenate([dy, jnp.array([dy[-1]])]) @staticmethod @jit def _compute_backward_diff(y: Any, x: Any) -> Any: """JIT-compiled backward difference for 3-5x speedup.""" dy = jnp.diff(y) / jnp.diff(x) return jnp.concatenate([jnp.array([dy[0]]), dy]) def _apply(self, dataset: OneDimensionalDataset) -> OneDimensionalDataset: # type: ignore[override] """ Apply derivative computation to dataset. Parameters ---------- dataset : OneDimensionalDataset Input dataset. Returns ------- OneDimensionalDataset Dataset with derivative as dependent variable. Notes ----- Replaces dependent variable with derivative. Independent variable is preserved. """ x = jnp.asarray(dataset.independent_variable_data) y = jnp.asarray(dataset.dependent_variable_data) # Compute first derivative using JIT-compiled methods if self.method == "gradient": # Central differences (2nd order accurate) dy = Derivative._compute_gradient(y, x) # type: ignore[call-arg] elif self.method == "forward": # Forward differences dy = Derivative._compute_forward_diff(y, x) # type: ignore[call-arg] elif self.method == "backward": # Backward differences dy = Derivative._compute_backward_diff(y, x) # type: ignore[call-arg] else: raise ValueError(f"Unknown method: {self.method}") # Compute second derivative if requested if self.order == 2: dy = Derivative._compute_gradient(dy, x) # type: ignore[call-arg] # Update dataset dataset._dependent_variable_data = dy return dataset
[docs] class CumulativeIntegral(DatasetTransform): """ Compute cumulative integral of 1D dataset. This transform computes the cumulative integral (running sum) of the dependent variable with respect to the independent variable using the trapezoidal rule. Parameters ---------- method : str, default='trapezoid' Integration method: - 'trapezoid': Trapezoidal rule (2nd order accurate) - 'simpson': Simpson's rule (4th order accurate, requires odd number of points) Attributes ---------- method : str Integration method. Examples -------- >>> import numpy as np >>> from piblin_jax.data.datasets import OneDimensionalDataset >>> from piblin_jax.transform.dataset import CumulativeIntegral >>> >>> # Create constant function (integral should be linear) >>> x = np.linspace(0, 10, 100) >>> y = np.ones_like(x) # Integral of 1 is x >>> dataset = OneDimensionalDataset( ... independent_variable_data=x, ... dependent_variable_data=y ... ) >>> >>> # Compute cumulative integral >>> integral = CumulativeIntegral() >>> result = integral.apply_to(dataset) >>> # Result should be approximately linear (x) >>> result.dependent_variable_data[-1] # Should be ~10 10.0 Notes ----- - Trapezoidal rule: I[i] = sum((y[i] + y[i-1]) / 2 * dx[i]) - First value is always 0 (integral from x[0] to x[0]) - JIT-compiled with JAX backend - Handles non-uniform spacing - For smoother results on noisy data, consider smoothing first """
[docs] def __init__(self, method: str = "trapezoid") -> None: """ Initialize cumulative integral transform. Parameters ---------- method : str, default='trapezoid' Integration method. """ super().__init__() self.method = method
@staticmethod @jit def _compute_trapezoid_cumsum(x: Any, y: Any) -> Any: """JIT-compiled cumulative trapezoidal integration for 3-5x speedup.""" dx = jnp.diff(x) y_avg = (y[1:] + y[:-1]) / 2.0 return jnp.concatenate([jnp.array([0.0]), jnp.cumsum(y_avg * dx)]) def _apply(self, dataset: OneDimensionalDataset) -> OneDimensionalDataset: # type: ignore[override] """ Apply cumulative integration to dataset. Parameters ---------- dataset : OneDimensionalDataset Input dataset. Returns ------- OneDimensionalDataset Dataset with cumulative integral as dependent variable. Notes ----- Replaces dependent variable with cumulative integral. Independent variable is preserved. """ x = jnp.asarray(dataset.independent_variable_data) y = jnp.asarray(dataset.dependent_variable_data) if self.method == "trapezoid": # Use JIT-compiled trapezoidal rule cumsum = CumulativeIntegral._compute_trapezoid_cumsum(x, y) # type: ignore[call-arg] else: raise ValueError(f"Unknown method: {self.method}") # Update dataset dataset._dependent_variable_data = cumsum return dataset
[docs] class DefiniteIntegral(DatasetTransform): """ Compute definite integral over specified region. This transform computes the definite integral (total area) under the curve between specified x-values. Parameters ---------- x_min : float, optional Lower integration bound (default: use dataset minimum). x_max : float, optional Upper integration bound (default: use dataset maximum). method : str, default='trapezoid' Integration method. Attributes ---------- x_min : float or None Lower bound. x_max : float or None Upper bound. method : str Integration method. Examples -------- >>> import numpy as np >>> from piblin_jax.data.datasets import OneDimensionalDataset >>> from piblin_jax.transform.dataset import DefiniteIntegral >>> >>> # Create data >>> x = np.linspace(0, np.pi, 100) >>> y = np.sin(x) # Integral from 0 to pi is 2 >>> dataset = OneDimensionalDataset(x, y) >>> >>> # Compute definite integral >>> integral = DefiniteIntegral() >>> result = integral.apply_to(dataset) >>> # Result stores integral value in metadata Notes ----- - Returns dataset with integral value stored in details - Original data is preserved - For cumulative integral, use CumulativeIntegral instead """
[docs] def __init__( self, x_min: float | None = None, x_max: float | None = None, method: str = "trapezoid" ) -> None: """ Initialize definite integral transform. Parameters ---------- x_min : float, optional Lower integration bound. x_max : float, optional Upper integration bound. method : str, default='trapezoid' Integration method. """ super().__init__() self.x_min = x_min self.x_max = x_max self.method = method
@staticmethod @jit def _compute_trapezoid_sum(x_region: Any, y_region: Any) -> Any: """JIT-compiled trapezoidal integration for 3-5x speedup.""" dx = jnp.diff(x_region) y_avg = (y_region[1:] + y_region[:-1]) / 2.0 return jnp.sum(y_avg * dx) def _apply(self, dataset: OneDimensionalDataset) -> OneDimensionalDataset: # type: ignore[override] """ Apply definite integration to dataset. Parameters ---------- dataset : OneDimensionalDataset Input dataset. Returns ------- OneDimensionalDataset Dataset with integral value stored in details. """ x = jnp.asarray(dataset.independent_variable_data) y = jnp.asarray(dataset.dependent_variable_data) # Determine integration bounds x_min = self.x_min if self.x_min is not None else float(jnp.min(x)) x_max = self.x_max if self.x_max is not None else float(jnp.max(x)) # Find indices within bounds mask = (x >= x_min) & (x <= x_max) x_region = x[mask] y_region = y[mask] # Compute integral if self.method == "trapezoid": # Trapezoidal rule if len(x_region) < 2: integral_value = 0.0 else: # Use JIT-compiled trapezoidal integration: 3-5x faster integral_value = float(DefiniteIntegral._compute_trapezoid_sum(x_region, y_region)) # type: ignore[call-arg,arg-type] else: raise ValueError(f"Unknown method: {self.method}") # Store result in details if dataset.details is None: dataset.details = {} dataset.details["integral_value"] = integral_value dataset.details["integral_x_min"] = x_min dataset.details["integral_x_max"] = x_max return dataset
__all__ = [ "CumulativeIntegral", "DefiniteIntegral", "Derivative", ]