"""This module defines the Dataset class, the central data structure of ixdat
An ixdat Dataset is a collection of references to DataSeries with the metadata required
to combine them, i.e. "build" the combined dataset. It has a number of general methods
to visualize and analyze the combined dataset. Dataset is also the base class for a
number of technique-specific Dataset-derived classes.
"""
from pathlib import Path
import json
import numpy as np
from .db import Saveable, PlaceHolderObject
from .data_series import DataSeries, TimeSeries, ValueSeries
from .projects.samples import Sample
from .projects.lablogs import LabLog
from .exporters.csv_exporter import CSVExporter
from .exceptions import BuildError, SeriesNotFoundError # , TechniqueError
[docs]class Measurement(Saveable):
"""The Measurement class"""
table_name = "measurement"
column_attrs = {
"name",
"technique",
"metadata",
"sample_name",
"tstamp",
}
extra_linkers = {
"measurement_series": ("data_series", "s_ids"),
"component_measurements": ("measurements", "m_ids"),
}
sel_str = None # the default thing to select on.
# FIXME: this is here because otherwise MSMeasurement.__init__ overwrites what it
# gets set to by ECMeasurement.__init__ in ECMSMeasurement.__init__
def __init__(
self,
name,
technique=None,
metadata=None,
s_ids=None,
series_list=None,
m_ids=None,
component_measurements=None,
reader=None,
plotter=None,
exporter=None,
sample=None,
lablog=None,
tstamp=None,
):
"""initialize a measurement
Args:
name (str): The name of the measurement
metadata (dict): Free-form measurement metadata. Must be json-compatible.
technique (str): The measurement technique
s_ids (list of int): The id's of the measurement's DataSeries, if
to be loaded (instead of given directly in series_list)
series_list (list of DataSeries): The measurement's DataSeries
m_ids (list of int): The id's of the component measurements, if to be
loaded. None unless this is a combined measurement (typically
corresponding to more than one file).
component_measurements (list of Measurements): The measurements of which
this measurement is a combination
reader (Reader): The file reader (None unless read from a file)
plotter (Plotter): The visualization tool for the measurement
exporter (Exporter): The exporting tool for the measurement
sample (Sample or str): The sample being measured
lablog (LabLog): The log entry with e.g. notes taken during the measurement
tstamp (float): The nominal starting time of the measurement, used for
data selection, visualization, and exporting.
"""
super().__init__()
self.name = name
self.technique = technique
self.metadata = metadata or {}
self.reader = reader
self._plotter = plotter
self._exporter = exporter
if isinstance(sample, str):
sample = Sample.load_or_make(sample)
self.sample = sample
if isinstance(lablog, str):
lablog = LabLog.load_or_make(lablog)
self.lablog = lablog
self._series_list = fill_object_list(series_list, s_ids, cls=DataSeries)
self._component_measurements = fill_object_list(
component_measurements, m_ids, cls=Measurement
)
self.tstamp = tstamp
# defining these methods here gets them the right docstrings :D
self.plot_measurement = self.plotter.plot_measurement
self.plot = self.plotter.plot_measurement
# TODO: ... but we need to think a bit more about how to most elegantly and
# dynamically choose plotters (Nice idea from Anna:
# https://github.com/ixdat/ixdat/issues/32)
[docs] @classmethod
def from_dict(cls, obj_as_dict):
"""Return an object of the measurement class of the right technique
Args:
obj_as_dict (dict): The full serializaiton (rows from table and aux
tables) of the measurement. obj_as_dict["technique"] specifies the
technique class to use, from TECHNIQUE_CLASSES
"""
# TODO: see if there isn't a way to put the import at the top of the module.
# see: https://github.com/ixdat/ixdat/pull/1#discussion_r546437410
from .techniques import TECHNIQUE_CLASSES
# certain objects stored in the Measurement, but only saved as their names.
# __init__() will get the object from the name, but the argument is
# called like the object either way. For example __init__() takes an argument
# called `sample` which can be an ixdat.Sample or a string interpreted as the
# name of the sample to load. Subsequently, the sample name is accessible as
# the property `sample_name`. But in the database is only saved the sample's
# name as a string with the key/column "sample_name". So
# obj_as_dict["sample_name"] needs to be renamed obj_as_dict["sample"] before
# obj_as_dict can be passed to __init__.
# TODO: This is a rather general problem (see, e.g. DataSeries.unit vs
# DataSeries.unit_name) and as such should be moved to db.Saveable
# see: https://github.com/ixdat/ixdat/pull/5#discussion_r565090372
objects_saved_as_their_name = [
"sample",
]
for object_type_str in objects_saved_as_their_name:
object_name_str = object_type_str + "_name"
if object_name_str in obj_as_dict:
obj_as_dict[object_type_str] = obj_as_dict[object_name_str]
del obj_as_dict[object_name_str]
if obj_as_dict["technique"] in TECHNIQUE_CLASSES:
# This makes it so that from_dict() can be used to initiate for any more
# derived technique, so long as obj_as_dict specifies the technique name!
technique_class = TECHNIQUE_CLASSES[obj_as_dict["technique"]]
if not issubclass(technique_class, cls):
# But we never want obj_as_dict["technique"] to take us to a *less*
# specific technique, if the user has been intentional about which
# class they call `as_dict` from (e.g. via a Reader)!
technique_class = cls
else:
# Normally, we're going to want to make sure that we're in
technique_class = cls
if technique_class is cls:
return cls(**obj_as_dict)
else: # Then its from_dict() might have more than ours:
return technique_class.from_dict(obj_as_dict)
[docs] @classmethod
def read(cls, path_to_file, reader, **kwargs):
"""Return a Measurement object from parsing a file with the specified reader
Args:
path_to_file (Path or str): The path to the file to read
reader (str or Reader class): The (name of the) reader to read the file with.
kwargs: key-word arguments are passed on to the reader's read() method.
"""
if isinstance(reader, str):
# TODO: see if there isn't a way to put the import at the top of the module.
# see: https://github.com/ixdat/ixdat/pull/1#discussion_r546437471
from .readers import READER_CLASSES
reader = READER_CLASSES[reader]()
# print(f"{__name__}. cls={cls}") # debugging
return reader.read(path_to_file, cls=cls, **kwargs)
[docs] @classmethod
def read_url(cls, url, reader, **kwargs):
"""Read a url (via a temporary file) using the specified reader"""
from .readers.reading_tools import url_to_file
path_to_temp_file = url_to_file(url)
measurement = cls.read(path_to_temp_file, reader=reader, **kwargs)
path_to_temp_file.unlink()
return measurement
[docs] @classmethod
def read_set(
cls, path_to_file_start, reader, suffix=None, file_list=None, **kwargs
):
"""Read and append a set of files.
Args:
path_to_file_start (Path or str): The path to the files to read including
the shared start of the file name: `Path(path_to_file).parent` is
interpreted as the folder where the file are.
`Path(path_to_file).name` is interpreted as the shared start of the files
to be appended.
reader (str or Reader class): The (name of the) reader to read the files with
file_list (list of Path): As an alternative to path_to_file_start, the
exact files to append can be specified in a list
suffix (str): If a suffix is given, only files with the specified ending are
added to the file list
kwargs: Key-word arguments are passed via cls.read() to the reader's read()
method, AND to cls.from_component_measurements()
"""
base_name = None
if not file_list:
folder = Path(path_to_file_start).parent
base_name = Path(path_to_file_start).name
file_list = [f for f in folder.iterdir() if f.name.startswith(base_name)]
if suffix:
file_list = [f for f in file_list if f.suffix == suffix]
component_measurements = [
cls.read(f, reader=reader, **kwargs) for f in file_list
]
if base_name and "name" not in kwargs:
kwargs["name"] = base_name
measurement = cls.from_component_measurements(component_measurements, **kwargs)
return measurement
[docs] @classmethod
def from_component_measurements(
cls, component_measurements, keep_originals=True, sort=True, **kwargs
):
"""Return a measurement with the data contained in the component measurements
TODO: This function "builds" the resulting measurement, i.e. it appends series
of the same name rather than keeping all the original copies. This should be
made more explicit, and a `build()` method should take over some of the work.
Args:
component_measurements (list of Measurement)
keep_originals: Whether to keep a list of component_measurements referenced.
This may result in redundant numpy arrays in RAM.
sort (bool): Whether to sort the series according to time
kwargs: key-word arguments are added to the dictionary for cls.from_dict()
Returns cls: a Measurement object of the
"""
# First prepare everything but the series_list in the object dictionary
obj_as_dict = component_measurements[0].as_dict()
obj_as_dict.update(kwargs)
del obj_as_dict["m_ids"], obj_as_dict["s_ids"]
if keep_originals:
obj_as_dict["component_measurements"] = component_measurements
# Now, prepare the built series. First, we loop through the component
# measurements and get all the data and metadata organized in a dictionary:
series_as_dicts = {}
tstamp = component_measurements[0].tstamp
for meas in component_measurements:
tstamp_i = meas.tstamp # save this for later.
meas.tstamp = tstamp # so that the time vectors share a t=0
for s_name in meas.series_names:
series = meas[s_name]
if s_name in series_as_dicts:
series_as_dicts[s_name]["data"] = np.append(
series_as_dicts[s_name]["data"], series.data
)
else:
series_as_dicts[s_name] = series.as_dict()
series_as_dicts[s_name]["data"] = series.data
if isinstance(series, ValueSeries):
# This will serve to match it to a TimeSeries later:
series_as_dicts[s_name]["t_name"] = series.tseries.name
meas.tstamp = tstamp_i # so it's not changed in the outer scope
# Now we make DataSeries, starting with all the TimeSeries
tseries_dict = {}
sort_indeces = {}
for name, s_as_dict in series_as_dicts.items():
if "tstamp" in s_as_dict:
if sort:
sort_indeces[name] = np.argsort(s_as_dict["data"])
s_as_dict["data"] = s_as_dict["data"][sort_indeces[name]]
tseries_dict[name] = TimeSeries.from_dict(s_as_dict)
# And then ValueSeries, and put both in with the TimeSeries
series_list = []
for name, s_as_dict in series_as_dicts.items():
if name in tseries_dict:
series_list.append(tseries_dict[name])
elif "t_name" in s_as_dict:
tseries = tseries_dict[s_as_dict["t_name"]]
if s_as_dict["data"].shape == tseries.shape:
# Then we assume that the time and value data have lined up
# successfully! :D
if sort:
s_as_dict["data"] = s_as_dict["data"][
sort_indeces[tseries.name]
]
vseries = ValueSeries(
name=name,
data=s_as_dict["data"],
unit_name=s_as_dict["unit_name"],
tseries=tseries,
)
else:
# this will be the case if vseries sharing the same tseries
# are not present in the same subset of component_measurements.
# In that case just append the vseries even though some tdata gets
# duplicated.
vseries = append_series(
[
s
for m in component_measurements
for s in m.series_list
if s.name == name
],
sort=sort,
)
series_list.append(vseries)
# Finally, add this series to the dictionary representation and return the object
obj_as_dict["series_list"] = series_list
return cls.from_dict(obj_as_dict)
@property
def metadata_json_string(self):
"""Measurement metadata as a JSON-formatted string"""
return json.dumps(self.metadata, indent=4)
@property
def sample_name(self):
"""Name of the sample on which the measurement was conducted"""
if self.sample:
return self.sample.name
@property
def series_list(self):
"""List of the DataSeries containing the measurement's data"""
for i, s in enumerate(self._series_list):
if isinstance(s, PlaceHolderObject):
self._series_list[i] = s.get_object()
return self._series_list
@property
def data_objects(self):
"""This is what the DB backend knows to save separately, here the series"""
# TimeSeries have to go first, so that ValueSeries are saved with the right t_id!
data_object_list = self.time_series
for s in self.series_list:
if s not in data_object_list:
if s.tseries not in data_object_list:
# FIXME: some tseries, likely with duplicate data, seem to not
# make it into series_list
data_object_list.append(s.tseries)
data_object_list.append(s)
return data_object_list
@property
def component_measurements(self):
"""List of the component measurements of which this measurement is a combination
For a pure measurement (not a measurement set), this is itself in a list.
"""
if not self._component_measurements:
return [
self,
]
for i, m in enumerate(self._component_measurements):
if isinstance(m, PlaceHolderObject):
self._component_measurements[i] = m.get_object()
return self._component_measurements
@property
def s_ids(self):
"""List of the id's of the measurement's DataSeries"""
return [series.id for series in self._series_list]
@property
def m_ids(self):
"""List of the id's of a combined measurement's component measurements"""
if not self._component_measurements:
return None
return [m.id for m in self._component_measurements]
@property
def series_dict(self):
"""Dictionary mapping the id's of the measurement's series to the DataSeries"""
return {(s.id, s.backend_name): s for s in self.series_list}
@property
def series_names(self):
"""List of the names of the series in the measurement"""
return set([series.name for series in self.series_list])
@property
def value_names(self):
"""List of the names of the VSeries in the measurement's DataSeries"""
return set([vseries.name for vseries in self.value_series])
@property
def value_series(self):
"""List of the VSeries in the measurement's DataSeries"""
return [
series for series in self.series_list if isinstance(series, ValueSeries)
]
@property
def time_names(self):
"""List of the names of the VSeries in the measurement's DataSeries"""
return set([tseries.name for tseries in self.time_series])
@property
def time_series(self):
"""List of the TSeries in the measurement's DataSeries. NOT timeshifted!"""
return [series for series in self.series_list if isinstance(series, TimeSeries)]
def __getitem__(self, item):
"""Return the built measurement DataSeries with its name specified by item
The item is interpreted as the name of a series. VSeries names can have "-v"
or "-y" as a suffix. The suffix "-t" or "-x" to a VSeries name can be used to
get instead its corresponding TSeries. In any case, if there are more than one
series with the name specified by item, they are appended. The timestamp is
always shifted to the measurement's tstamp
Args:
item (str): The name of a DataSeries (see above)
"""
ss = [s for s in self.series_list if s.name == item]
if len(ss) == 1:
s = ss[0]
elif len(ss) > 1:
s = append_series(ss)
elif item[-2:] in ["-t", "-x", "-v", "-y"]:
ss = [s for s in self.series_list if s.name == item[:-2]]
if len(ss) == 1:
s = ss[0]
else:
s = append_series(ss)
else:
raise SeriesNotFoundError(f"{self} has no series called {item}")
if hasattr(s, "tstamp") and not s.tstamp == self.tstamp:
s = time_shifted(s, self.tstamp)
return s
def __setitem__(self, series_name, series):
"""Append `series` with name=`series_name` to `series_list` and remove others."""
if not series.name == series_name:
raise SeriesNotFoundError(
f"Can't set {self}[{series_name}] = {series}. Series names don't agree."
)
del self[series_name]
self.series_list.append(series)
def __delitem__(self, series_name):
"""Remove all series which have `series_name` as their name from series_list"""
new_series_list = []
for s in self.series_list:
if not s.name == series_name:
new_series_list.append(s)
self._series_list = new_series_list
[docs] def correct_data(self, value_name, new_data):
"""Replace the old data for ´value_name´ (str) with ´new_data` (np array)"""
old_vseries = self[value_name]
new_vseries = ValueSeries(
name=value_name,
unit_name=old_vseries.unit_name,
data=new_data,
tseries=old_vseries.tseries,
)
self[value_name] = new_vseries
[docs] def grab(self, item, tspan=None, include_endpoints=False, tspan_bg=None):
"""Return a value vector with the corresponding time vector
Grab is the *canonical* way to retrieve numerical time-dependent data from a
measurement in ixdat. The first argument is always the name of the value to get
time-resolved data for (the name of a ValueSeries). The second, optional,
argument is a timespan to select the data for.
Two vectors are returned: first time (t), then value (v). They are of the same
length so that `v` can be plotted against `t`, integrated over `t`, interpolated
via `t`, etc. `t` and `v` are returned in the units of their DataSeries.
TODO: option to specifiy desired units
Typical usage::
t, v = measurement.grab(potential, tspan=[0, 100])
Args:
item (str): The name of the DataSeries to grab data for
tspan (iter of float): Defines the timespan with its first and last values.
Optional. By default the entire time of the measurement is included.
include_endpoints (bool): Whether to add a points at t = tspan[0] and
t = tspan[-1] to the data returned. This makes trapezoidal integration
less dependent on the time resolution. Default is False.
tspan_bg (iterable): Optional. A timespan defining when `item` is at its
baseline level. The average value of `item` in this interval will be
subtracted from the values returned.
"""
vseries = self[item]
tseries = vseries.tseries
v = vseries.data
t = tseries.data + tseries.tstamp - self.tstamp
if tspan is not None: # np arrays don't boolean well :(
if include_endpoints:
if t[0] < tspan[0]: # then add a point to include tspan[0]
v_0 = np.interp(tspan[0], t, v)
t = np.append(tspan[0], t)
v = np.append(v_0, v)
if tspan[-1] < t[-1]: # then add a point to include tspan[-1]
v_end = np.interp(tspan[-1], t, v)
t = np.append(t, tspan[-1])
v = np.append(v, v_end)
mask = np.logical_and(tspan[0] <= t, t <= tspan[-1])
t, v = t[mask], v[mask]
if tspan_bg:
t_bg, v_bg = self.grab(item, tspan=tspan_bg)
v = v - np.mean(v_bg)
return t, v
[docs] def grab_for_t(self, item, t, tspan_bg=None):
"""Return a numpy array with the value of item interpolated to time t
Args:
item (str): The name of the value to grab
t (np array): The time vector to grab the value for
tspan_bg (iterable): Optional. A timespan defining when `item` is at its
baseline level. The average value of `item` in this interval will be
subtracted from what is returned.
"""
vseries = self[item]
tseries = vseries.tseries
v_0 = vseries.data
t_0 = tseries.data + tseries.tstamp - self.tstamp
v = np.interp(t, t_0, v_0)
if tspan_bg:
t_bg, v_bg = self.grab(item, tspan=tspan_bg)
v = v - np.mean(v_bg)
return v
[docs] def integrate(self, item, tspan=None, ax=None):
"""Return the time integral of item in the specified timespan"""
t, v = self.grab(item, tspan, include_endpoints=True)
if ax:
if ax == "new":
ax = self.plotter.new_ax(ylabel=item)
# FIXME: xlabel=self[item].tseries.name gives a problem :(
ax.plot(t, v, color="k", label=item)
ax.fill_between(t, v, np.zeros(t.shape), where=v > 0, color="g", alpha=0.3)
ax.fill_between(
t, v, np.zeros(t.shape), where=v < 0, color="g", alpha=0.1, hatch="//"
)
return np.trapz(v, t)
@property
def data_cols(self):
"""Return a set of the names of all of the measurement's VSeries and TSeries"""
return set([s.name for s in (self.value_series + self.time_series)])
@property
def plotter(self):
"""The default plotter for Measurement is ValuePlotter."""
if not self._plotter:
from .plotters import ValuePlotter
# FIXME: I had to import here to avoid running into circular import issues
self._plotter = ValuePlotter(measurement=self)
return self._plotter
@property
def exporter(self):
"""The default exporter for Measurement is CSVExporter."""
if not self._exporter:
self._exporter = CSVExporter(measurement=self)
return self._exporter
[docs] def export(self, *args, exporter=None, **kwargs):
"""Export the measurement using its exporter (see its Exporter for details)"""
if exporter:
return exporter.export_measurement(self, *args, **kwargs)
return self.exporter.export(*args, **kwargs)
[docs] def get_original_m_id_of_series(self, series):
"""Return the id(s) of component measurements to which `series` belongs."""
m_id_list = []
for m in self.component_measurements:
if series.id in m.s_ids:
m_id_list.append(m.id)
if len(m_id_list) == 1:
return m_id_list[0]
return m_id_list
[docs] def cut(self, tspan, t_zero=None):
"""Return a new measurement with the data in the given time interval
Args:
tspan (iter of float): The time interval to use, relative to self.tstamp
tspan[0] is the start time of the interval, and tspan[-1] is the end
time of the interval. Using tspan[-1] means you can directly use a
long time vector that you have at hand to describe the time interval
you're looking for.
t_zero (float or str): Where to put the tstamp of the returned measurement.
Default is to keep it the same as the present tstamp. If instead it is
a float, this adds the float to the present tstamp. If t_zero is "start",
tspan[0] is added to the present tstamp.
"""
new_series_list = []
obj_as_dict = self.as_dict()
time_cutting_stuff = {} # {tseries_id: (mask, new_tseries)}
for series in self.series_list:
try:
tseries = series.tseries
if tseries is None:
raise AttributeError
except AttributeError: # series independent of time are uneffected by cut
new_series_list.append(series)
else:
t_id = (tseries.id, tseries.backend_name)
# FIXME: Beautiful, met my first id clash here. Local memory and loaded
# each had a timeseries with id=1, but different length. Previously
# the above line of code was just t_id = tseries.id as you'd expect,
# meaning that time_cutting_stuff appeared to already have the needed
# tseries but didn't!
# Note that the id together with the backend works but should be
# replaced by a single Universal Unique Identifier, or perhaps just
# a property `Saveable.uid`, returning `(self.id, self.backend_name)`
if t_id in time_cutting_stuff:
mask, new_tseries = time_cutting_stuff[t_id]
else:
t = tseries.t + tseries.tstamp - self.tstamp
mask = np.logical_and(tspan[0] <= t, t <= tspan[-1])
new_tseries = TimeSeries(
name=tseries.name,
unit_name=tseries.unit_name,
tstamp=tseries.tstamp,
data=tseries.data[mask],
)
time_cutting_stuff[t_id] = (mask, new_tseries)
if True not in mask:
continue
if False not in mask:
new_series_list.append(series)
elif (series.id, series.backend_name) == t_id:
new_series_list.append(new_tseries)
else:
new_series = series.__class__(
name=series.name,
unit_name=series.unit_name,
data=series.data[mask],
tseries=new_tseries,
)
new_series_list.append(new_series)
obj_as_dict["series_list"] = new_series_list
del obj_as_dict["s_ids"]
if t_zero:
if t_zero == "start":
t_zero = tspan[0]
obj_as_dict["tstamp"] += t_zero
new_measurement = self.__class__.from_dict(obj_as_dict)
return new_measurement
[docs] def select_value(self, *args, **kwargs):
"""Return a new Measurement with the time(s) meeting criteria.
Can only take one arg or kwarg!
The `series_name` is `self.sel_str` if given an arg, kw if given a kwarg.
Either way the argument is the `value` to be selected for.
The method finds all time intervals for which `self[series_name] == value`
It then cuts the measurement according to each time interval and adds these
segments together. TODO: This can be done better, i.e. without chopping series.
TODO: greater-than and less-than kwargs.
Ideally you should be able to say e.g., `select(cycle=1, 0.5<potential<1)`
"""
if len(args) >= 1:
if not self.sel_str:
raise BuildError(
f"{self} does not have a default selection string "
f"(Measurement.sel_str), and so selection only works with kwargs."
)
kwargs[self.sel_str] = args
if len(kwargs) > 1:
raise BuildError(
f"select_value got kwargs={kwargs} but can only be used for one value "
f"at a time. Use select_values for more."
)
new_measurement = self
((series_name, value),) = kwargs.items()
t, v = self.grab(series_name)
mask = v == value # linter doesn't realize this is a np array
mask_prev = np.append(False, mask[:-1])
mask_next = np.append(mask[1:], False)
interval_starts_here = np.logical_and(
np.logical_not(mask_prev), mask
) # True at [0] if mask[0] is True.
interval_ends_here = np.logical_and(
mask, np.logical_not(mask_next)
) # True at [-1] if mask[-1] is True.
t_starts = list(t[interval_starts_here])
t_ends = list(t[interval_ends_here])
tspans = zip(t_starts, t_ends)
meas = None
for tspan in tspans:
if meas:
meas = meas + new_measurement.cut(tspan)
else:
meas = new_measurement.cut(tspan)
new_measurement = meas
return new_measurement
[docs] def select_values(self, *args, **kwargs):
"""Return a new Measurement with the time(s) in the measurement meeting criteria
Any series can be selected for using the series name as a key-word. Arguments
can be single acceptable values or lists of acceptable values. In the latter
case, each acceptable value is selected for on its own and the resulting
measurements added together.
FIXME: That is sloppy because it multiplies the number of DataSeries
FIXME: containing the same amount of data.
If no key-word is given, the series name is assumed to
be the default selector, which is named by self.sel_str. Multiple criteria are
applied sequentially, i.e. you get the intersection of satisfying parts.
Args:
args (tuple): Argument(s) given without key-word are understood as acceptable
value(s) for the default selector (that named by self.sel_str)
kwargs (dict): Each key-word arguments is understood as the name
of a series and its acceptable value(s).
"""
if len(args) >= 1:
if not self.sel_str:
raise BuildError(
f"{self} does not have a default selection string "
f"(Measurement.sel_str), and so selection only works with kwargs."
)
if len(args) == 1:
args = args[0]
kwargs[self.sel_str] = args
new_measurement = self
for series_name, allowed_values in kwargs.items():
if not hasattr(allowed_values, "__iter__"):
allowed_values = [allowed_values]
meas = None
for value in allowed_values:
m = new_measurement.select_value(**{series_name: value})
if meas:
meas = meas + m
else:
meas = m
new_measurement = meas
return new_measurement
[docs] def select(self, *args, tspan=None, **kwargs):
"""`cut` (with tspan) and `select_values` (with args and/or kwargs)."""
new_measurement = self
if tspan:
new_measurement = new_measurement.cut(tspan=tspan)
if args or kwargs:
new_measurement = new_measurement.select_values(*args, **kwargs)
return new_measurement
@property
def tspan(self):
"""Return `(t_start, t_finish)` interval including all data in the measurement"""
t_start = None
t_finish = None
for tcol in self.time_names:
t = self[tcol].data
t_start = min(t_start, t[0]) if t_start else t[0]
t_finish = max(t_finish, t[-1]) if t_finish else t[-1]
return t_start, t_finish
def __add__(self, other):
"""Addition of measurements appends the series and component measurements lists.
Adding results in a new Measurement. If the combination of the two measurements'
techniques is a recognized hyphenated technique, it returns an object of that
technique's measurement class. Otherwise it returns an object of Measurement.
metadata, sample, and logentry come from the first measurement.
An important point about addition is that it is almost but not quite associative
and commutative i.e.
A + (B + C) == (A + B) + C == C + B + A is not quite true
Each one results in the same series and component measurements. They will even
appear in the same order in A + (B + C) and (A + B) + C. However, the technique
might be different, as a new technique might be determined each time.
Note also that there is no difference between hyphenating (simultaneous EC and
MS datasets, for example) and appending (sequential EC datasets). Either way,
all the raw series (or their placeholders) are just stored in the lists.
TODO: Make sure with tests this is okay, differentiate using | operator if not.
"""
# First we prepare a dictionary for all but the series_list.
# This has both dicts, but prioritizes self's dict for all that appears twice.
obj_as_dict = self.as_dict()
other_as_dict = other.as_dict()
for k, v in other_as_dict.items():
# Looking forward to the "|" operator!
if k not in obj_as_dict:
obj_as_dict[k] = v
new_name = self.name + " AND " + other.name
new_technique = get_combined_technique(self.technique, other.technique)
# TODO: see if there isn't a way to put the import at the top of the module.
# see: https://github.com/ixdat/ixdat/pull/1#discussion_r546437410
from .techniques import TECHNIQUE_CLASSES
if new_technique in TECHNIQUE_CLASSES:
cls = TECHNIQUE_CLASSES[new_technique]
elif self.__class__ is other.__class__:
cls = self.__class__
else:
cls = Measurement
new_series_list = self.series_list + other.series_list
new_component_measurements = (
self.component_measurements + other.component_measurements
)
obj_as_dict.update(
name=new_name,
technique=new_technique,
series_list=new_series_list,
component_measurements=new_component_measurements,
)
return cls.from_dict(obj_as_dict)
[docs] def join(self, other, join_on=None):
"""Join two measurements based on a shared data series
This involves projecting all timeseries from other's data series so that the
variable named by `join_on` is shared between all data series.
This is analogous to an explicit inner join.
Args:
other (Measurement): a second measurement to join to self
join_on (str or tuple): Either a string, if the value to join on is called
the same thing in both measurements, or a tuple of two strings if it is
not.
The variable described by join_on must be monotonically increasing in
both measurements.
"""
# ------- Now come a few module-level functions for series manipulation ---------
# TODO: move to an `ixdat.build` module or similar.
# There's a lot of stuff that should go there. Basically anything in ECMeasurement
# that can be reasonably converted to a module level function to decrease the
# awkwardness there.
[docs]def append_series(series_list, sort=True, tstamp=None):
"""Return series appending series_list relative to series_list[0].tseries.tstamp
Args:
series_list (list of Series): The series to append (must all be of same type)
sort (bool): Whether to sort the data so that time only goes forward
tstamp (unix tstamp): The t=0 of the returned series or its TimeSeries.
"""
s0 = series_list[0]
if isinstance(s0, TimeSeries):
return append_tseries(series_list, sort=sort, tstamp=tstamp)
elif isinstance(s0, ValueSeries):
return append_vseries_by_time(series_list, sort=sort, tstamp=tstamp)
raise BuildError(
f"An algorithm of append_series for series like {s0} is not yet implemented"
)
[docs]def append_vseries_by_time(series_list, sort=True, tstamp=None):
"""Return new ValueSeries with the data in series_list appended
Args:
series_list (list of ValueSeries): The value series to append
sort (bool): Whether to sort the data so that time only goes forward
tstamp (unix tstamp): The t=0 of the returned ValueSeries' TimeSeries.
"""
name = series_list[0].name
cls = series_list[0].__class__
unit = series_list[0].unit
data = np.array([])
tseries_list = [s.tseries for s in series_list]
tseries, sort_indeces = append_tseries(
tseries_list, sort=sort, return_sort_indeces=True, tstamp=tstamp
)
for s in series_list:
if not (s.unit == unit and s.__class__ == cls):
raise BuildError(f"can't append {series_list}")
data = np.append(data, s.data)
if sort:
data = data[sort_indeces]
return cls(name=name, unit_name=unit.name, data=data, tseries=tseries)
[docs]def append_tseries(series_list, sort=True, return_sort_indeces=False, tstamp=None):
"""Return new TimeSeries with the data appended.
Args:
series_list (list of TimeSeries): The time series to append
sort (bool): Whether to sort the data so that time only goes forward
return_sort_indeces (bool): Whether to return the indeces that sort the data
tstamp (unix tstamp): The t=0 of the returned TimeSeries.
"""
name = series_list[0].name
cls = series_list[0].__class__
unit = series_list[0].unit
tstamp = tstamp or series_list[0].tstamp
data = np.array([])
for s in series_list:
if not (s.unit == unit and s.__class__ == cls):
raise BuildError(f"can't append {series_list}")
data = np.append(data, s.data + s.tstamp - tstamp)
if sort:
sort_indices = np.argsort(data)
data = data[sort_indices]
else:
sort_indices = None
tseries = cls(name=name, unit_name=unit.name, data=data, tstamp=tstamp)
if return_sort_indeces:
return tseries, sort_indices
return tseries
[docs]def fill_object_list(object_list, obj_ids, cls=None):
"""Add PlaceHolderObjects to object_list for any unrepresented obj_ids.
Args:
object_list (list of objects or None): The objects already known,
in a list. This is the list to be appended to. If None, an empty
list will be appended to.
obj_ids (list of ints or None): The id's of objects to ensure are in
the list. Any id in obj_ids not already represented in object_list
is added to the list as a PlaceHolderObject
cls (Saveable class): the class remembered by any PlaceHolderObjects
added to the object_list, so that eventually the right object will
be loaded.
"""
cls = cls or object_list[0].__class__
object_list = object_list or []
provided_series_ids = [s.id for s in object_list]
if not obj_ids:
return object_list
for i in obj_ids:
if i not in provided_series_ids:
object_list.append(PlaceHolderObject(i=i, cls=cls))
return object_list
[docs]def time_shifted(series, tstamp=None):
"""Return a series with the time shifted to be relative to tstamp"""
if tstamp is None or not series:
return series
if tstamp == series.tstamp:
return series
cls = series.__class__
if isinstance(series, TimeSeries):
return cls(
name=series.name,
unit_name=series.unit.name,
data=series.data + series.tstamp - tstamp,
tstamp=tstamp,
)
elif isinstance(series, ValueSeries):
series = cls(
name=series.name,
unit_name=series.unit.name,
data=series.data,
tseries=time_shifted(series.tseries, tstamp=tstamp),
)
return series
[docs]def get_combined_technique(technique_1, technique_2):
"""Return the name of the technique resulting from adding two techniques"""
# TODO: see if there isn't a way to put the import at the top of the module.
# see: https://github.com/ixdat/ixdat/pull/1#discussion_r546437410
if technique_1 == technique_2:
return technique_1
# if we're a component technique of a hyphenated technique to that hyphenated
# technique, the result is still the hyphenated technique. e.g. EC-MS + MS = EC-MS
if "-" in technique_1 and technique_2 in technique_1.split("-"):
return technique_1
elif "-" in technique_2 and technique_1 in technique_2.split("-"):
return technique_2
# if we're adding two independent technique which are components of a hyphenated
# technique, then we want that hyphenated technique. e.g. EC + MS = EC-MS
from .techniques import TECHNIQUE_CLASSES
for hyphenated in [
technique_1 + "-" + technique_2,
technique_2 + "-" + technique_1,
]:
if hyphenated in TECHNIQUE_CLASSES:
return hyphenated
# if all else fails, we just join them with " AND ". e.g. MS + XRD = MS AND XRD
return technique_1 + " AND " + technique_2