Source code for piblin_jax.data.datasets.distribution

"""
Distribution dataset for continuous probability density functions.

Used for molecular weight distributions, continuous PDFs, and other
distribution data where the probability density is a continuous function.
"""

from typing import Any

import numpy as np

from piblin_jax.backend import jnp, to_numpy

from .base import Dataset


[docs] class Distribution(Dataset): """ Distribution dataset with variable data and probability density. This dataset type represents continuous probability density functions: - Molecular weight distributions (GPC/SEC) - Particle size distributions (continuous) - Statistical distributions - Probability density functions - Any continuous distribution data Parameters ---------- variable_data : array_like 1D array of the variable (e.g., molecular weight, particle size). probability_density : array_like 1D array of probability density values corresponding to variable_data. Should have the same length as variable_data. conditions : dict[str, Any] | None, optional Experimental conditions. details : dict[str, Any] | None, optional Additional metadata. Attributes ---------- variable_data : np.ndarray Variable data as NumPy array. probability_density : np.ndarray Probability density as NumPy array. conditions : dict[str, Any] Experimental conditions. details : dict[str, Any] Additional metadata. Raises ------ ValueError If variable_data and probability_density have different shapes. Examples -------- >>> import numpy as np >>> from piblin_jax.data.datasets import Distribution >>> # Molecular weight distribution from GPC >>> molecular_weight = np.linspace(1000, 100000, 500) >>> # Gaussian-like distribution centered at 50000 >>> pdf = np.exp(-((molecular_weight - 50000) ** 2) / (2 * 10000 ** 2)) >>> # Normalize so integral equals 1 >>> pdf = pdf / np.trapz(pdf, molecular_weight) >>> mwd = Distribution( ... variable_data=molecular_weight, ... probability_density=pdf, ... conditions={"polymer": "polystyrene", "solvent": "THF"}, ... details={"technique": "GPC", "standard": "PS"} ... ) >>> mwd.variable_data.shape (500,) >>> mwd.probability_density.shape (500,) >>> # Particle size distribution >>> diameter = np.linspace(1, 1000, 1000) # nm >>> psd = np.exp(-((np.log(diameter) - np.log(100)) ** 2) / (2 * 0.5 ** 2)) >>> psd = psd / np.trapz(psd, diameter) >>> particle_dist = Distribution( ... variable_data=diameter, ... probability_density=psd, ... conditions={"sample": "nanoparticles_Au"}, ... details={"units": "nm", "technique": "DLS"} ... ) >>> # Custom probability distribution >>> x = np.linspace(-5, 5, 1000) >>> pdf = np.exp(-x**2 / 2) / np.sqrt(2 * np.pi) >>> normal_dist = Distribution( ... variable_data=x, ... probability_density=pdf, ... details={"distribution": "standard normal"} ... ) Notes ----- Unlike Histogram which represents discrete bins, Distribution represents a continuous probability density function. The probability density values are typically normalized such that the integral over the variable range equals 1, but this is not enforced by the class. The distinction between Distribution and OneDimensionalDataset is primarily semantic: Distribution emphasizes that the dependent variable represents a probability density, while OneDimensionalDataset is more general. """
[docs] def __init__( self, variable_data: Any, probability_density: Any, conditions: dict[str, Any] | None = None, details: dict[str, Any] | None = None, ): """ Initialize distribution dataset. Parameters ---------- variable_data : array_like 1D array of variable values. probability_density : array_like 1D array of probability density values. conditions : dict[str, Any] | None, optional Experimental conditions. details : dict[str, Any] | None, optional Additional metadata. Raises ------ ValueError If arrays have different shapes. """ super().__init__(conditions=conditions, details=details) # Convert to backend arrays self._variable_data = jnp.asarray(variable_data) self._probability_density = jnp.asarray(probability_density) # Validation: arrays must have same shape if self._variable_data.shape != self._probability_density.shape: raise ValueError( f"Variable and probability density arrays must have same shape. " f"Got variable: {self._variable_data.shape}, " f"probability_density: {self._probability_density.shape}" )
@property def variable_data(self) -> np.ndarray: """ Get variable data as NumPy array. Returns ------- np.ndarray 1D NumPy array of variable values (e.g., molecular weight, particle size, x-values). Examples -------- >>> dist.variable_data array([1000., 1198., 1396., ..., 99604., 99802., 100000.]) """ return to_numpy(self._variable_data) @property def probability_density(self) -> np.ndarray: """ Get probability density as NumPy array. Returns ------- np.ndarray 1D NumPy array of probability density values. Examples -------- >>> dist.probability_density array([0.000001, 0.000002, ..., 0.000003, 0.000001]) >>> # Check normalization (should be close to 1) >>> np.trapz(dist.probability_density, dist.variable_data) 1.0000234 """ return to_numpy(self._probability_density)