"""
Composite one-dimensional dataset with multiple dependent variables.
Used for multi-channel instrument data where multiple signals share the
same independent variable (e.g., time, wavelength).
"""
from typing import Any
import numpy as np
from piblin_jax.backend import jnp, to_numpy
from .base import Dataset
[docs]
class OneDimensionalCompositeDataset(Dataset):
"""
Composite 1D dataset with shared independent variable and multiple dependents.
This dataset type represents multi-channel or multi-detector data where
multiple signals share the same independent variable:
- Multi-detector chromatography (UV, fluorescence, conductivity)
- Multi-channel spectroscopy
- Multi-sensor time series
- Parallel measurements with shared axis
Parameters
----------
independent_variable_data : array_like
1D array of independent variable (time, wavelength, etc.) shared by
all channels.
dependent_variable_data_list : list of array_like
List of 1D arrays, each representing a different channel/detector.
All must have the same length as independent_variable_data.
conditions : dict[str, Any] | None, optional
Experimental conditions.
details : dict[str, Any] | None, optional
Additional metadata.
Attributes
----------
independent_variable_data : np.ndarray
Shared independent variable as NumPy array.
dependent_variable_data_list : list of np.ndarray
List of dependent variables as NumPy arrays.
conditions : dict[str, Any]
Experimental conditions.
details : dict[str, Any]
Additional metadata.
Raises
------
ValueError
If dependent_variable_data_list is empty, or if any channel has
different length than independent_variable_data.
Examples
--------
>>> import numpy as np
>>> from piblin_jax.data.datasets import OneDimensionalCompositeDataset
>>> # Multi-detector HPLC data
>>> time = np.linspace(0, 20, 2000) # minutes
>>> uv_254 = np.sin(time) + 0.1 * np.random.randn(2000)
>>> uv_280 = np.cos(time) + 0.1 * np.random.randn(2000)
>>> fluorescence = np.sin(2 * time) + 0.05 * np.random.randn(2000)
>>> hplc = OneDimensionalCompositeDataset(
... independent_variable_data=time,
... dependent_variable_data_list=[uv_254, uv_280, fluorescence],
... conditions={"mobile_phase": "ACN/H2O 60:40", "flow_rate": 1.0},
... details={
... "channels": ["UV 254nm", "UV 280nm", "Fluorescence"],
... "instrument": "HPLC-1"
... }
... )
>>> hplc.independent_variable_data.shape
(2000,)
>>> len(hplc.dependent_variable_data_list)
3
>>> hplc.dependent_variable_data_list[0].shape
(2000,)
>>> # Multi-channel oscilloscope data
>>> t = np.linspace(0, 1, 10000)
>>> ch1 = np.sin(2 * np.pi * 5 * t)
>>> ch2 = np.sin(2 * np.pi * 10 * t)
>>> ch3 = np.sin(2 * np.pi * 15 * t)
>>> ch4 = np.sin(2 * np.pi * 20 * t)
>>> scope_data = OneDimensionalCompositeDataset(
... independent_variable_data=t,
... dependent_variable_data_list=[ch1, ch2, ch3, ch4],
... conditions={"sampling_rate": 10000},
... details={"instrument": "oscilloscope", "channels": 4}
... )
Notes
-----
This dataset type is useful when multiple measurements are made simultaneously
along the same independent axis. Each channel is stored as a separate NumPy
array in the list, allowing different processing or analysis on each channel
while maintaining their shared relationship through the common independent
variable.
The internal storage uses backend arrays (JAX when available) and converts
to NumPy at the property boundaries.
"""
[docs]
def __init__(
self,
independent_variable_data: Any,
dependent_variable_data_list: list[Any],
conditions: dict[str, Any] | None = None,
details: dict[str, Any] | None = None,
):
"""
Initialize composite one-dimensional dataset.
Parameters
----------
independent_variable_data : array_like
1D array of shared independent variable.
dependent_variable_data_list : list of array_like
List of 1D arrays for each channel.
conditions : dict[str, Any] | None, optional
Experimental conditions.
details : dict[str, Any] | None, optional
Additional metadata.
Raises
------
ValueError
If list is empty or if any channel length doesn't match
independent variable.
"""
super().__init__(conditions=conditions, details=details)
# Validation: must have at least one dependent variable
if not dependent_variable_data_list or len(dependent_variable_data_list) == 0:
raise ValueError(
"OneDimensionalCompositeDataset requires at least one dependent variable. "
"Got empty list."
)
# Convert independent variable to backend array
self._independent_variable_data = jnp.asarray(independent_variable_data)
expected_length = self._independent_variable_data.shape[0]
# Convert all dependent variables to backend arrays and validate
self._dependent_variable_data_list = []
for i, dep_data in enumerate(dependent_variable_data_list):
dep_array = jnp.asarray(dep_data)
# Validation: each channel must match independent variable length
if dep_array.shape[0] != expected_length:
raise ValueError(
f"All dependent variables must have same length as independent variable. "
f"Independent variable has length {expected_length}, but "
f"dependent variable at index {i} has length {dep_array.shape[0]}"
)
self._dependent_variable_data_list.append(dep_array)
@property
def independent_variable_data(self) -> np.ndarray:
"""
Get shared independent variable as NumPy array.
Returns
-------
np.ndarray
1D NumPy array of independent variable shared by all channels.
Examples
--------
>>> dataset.independent_variable_data
array([0., 0.01, 0.02, ..., 19.98, 19.99, 20.])
"""
return to_numpy(self._independent_variable_data)
@property
def dependent_variable_data_list(self) -> list[np.ndarray]:
"""
Get list of dependent variables as NumPy arrays.
Returns
-------
list of np.ndarray
List of 1D NumPy arrays, one for each channel/detector.
Examples
--------
>>> len(dataset.dependent_variable_data_list)
3
>>> dataset.dependent_variable_data_list[0] # First channel
array([0.123, 0.145, ..., 0.234])
>>> dataset.dependent_variable_data_list[1] # Second channel
array([0.456, 0.478, ..., 0.567])
>>> # Process each channel
>>> for i, channel in enumerate(dataset.dependent_variable_data_list):
... print(f"Channel {i}: max = {channel.max():.3f}")
Channel 0: max = 1.234
Channel 1: max = 1.567
Channel 2: max = 0.987
"""
# Convert all backend arrays to NumPy
return [to_numpy(dep) for dep in self._dependent_variable_data_list]