Source code for ixdat.readers.zilien

import re

import numpy as np
import pandas as pd

from .ec_ms_pkl import measurement_from_ec_ms_dataset
from .reading_tools import timestamp_string_to_tstamp, FLOAT_MATCH
from ..data_series import DataSeries, TimeSeries, ValueSeries, Field
from ..techniques import ECMSMeasurement, MSMeasurement, ECMeasurement, Measurement
from ..techniques.ms import MSSpectrum

ZILIEN_TIMESTAMP_FORM = "%Y-%m-%d %H_%M_%S"  # like 2021-03-15 18_50_10


[docs]class ZilienTSVReader: """Class for reading files saved by Spectro Inlets' Zilien software"""
[docs] def read(self, path_to_file, cls=None, name=None, **kwargs): """Read a zilien file TODO: This is a hack using EC_MS to read the .tsv. Will be replaced. """ from EC_MS import Zilien_Dataset if cls is Measurement: cls = ECMSMeasurement if "technique" not in kwargs: if issubclass(cls, ECMSMeasurement): kwargs["technique"] = "EC-MS" elif issubclass(cls, ECMeasurement): kwargs["technique"] = "EC" elif issubclass(cls, MSMeasurement): kwargs["technique"] = "MS" ec_ms_dataset = Zilien_Dataset(path_to_file) return measurement_from_ec_ms_dataset( ec_ms_dataset.data, cls=cls, name=name, reader=self, **kwargs, )
[docs]class ZilienTMPReader: """A class for stitching the files in a Zilien tmp directory to an ECMSMeasurement This is necessary because Zilien often crashes, leaving only the tmp directory. This is less advanced but more readable than the Spectro Inlets stitching solution. """ def __init__(self, path_to_tmp_dir=None): self.path_to_tmp_dir = Path(path_to_tmp_dir) if path_to_tmp_dir else None
[docs] def read(self, path_to_tmp_dir, cls=None, **kwargs): """Make a measurement from all the single-value .tsv files in a Zilien tmp dir Args: path_to_tmp_dir (Path or str): the path to the tmp dir cls (Measurement class): Defaults to ECMSMeasurement """ if path_to_tmp_dir: self.path_to_tmp_dir = Path(path_to_tmp_dir) cls = cls or ECMSMeasurement name = self.path_to_tmp_dir.parent.name timestamp_string = name[:19] # the zilien timestamp is the first 19 chars tstamp = timestamp_string_to_tstamp( timestamp_string, form=ZILIEN_TIMESTAMP_FORM ) series_list = [] for tmp_file in self.path_to_tmp_dir.iterdir(): series_list += series_list_from_tmp(tmp_file) obj_as_dict = { "name": name, "tstamp": tstamp, "series_list": series_list, "technique": "EC-MS", "reader": self, } obj_as_dict.update(kwargs) return cls.from_dict(obj_as_dict)
[docs]def series_list_from_tmp(path_to_file): """Return [ValueSeries, TimeSeries] with the data in a zilien tmp .tsv file""" file_name = Path(path_to_file).name timestamp_string = file_name[:19] # the zilien timestamp form is 19 chars long tstamp = timestamp_string_to_tstamp(timestamp_string, form=ZILIEN_TIMESTAMP_FORM) column_match = re.search(r"\.([^\.]+)\.data", file_name) if not column_match: print(f"could not find column name in {path_to_file}") return [] v_name = column_match.group(1) mass_match = re.search("M[0-9]+", v_name) if mass_match: v_name = mass_match.group() unit = "A" else: unit = None t_name = v_name + "-x" df = pd.read_csv(path_to_file, delimiter="\t", names=[t_name, v_name], header=0) t_data, v_data = df[t_name].to_numpy(), df[v_name].to_numpy() tseries = TimeSeries(name=t_name, unit_name="s", data=t_data, tstamp=tstamp) vseries = ValueSeries(name=v_name, unit_name=unit, data=v_data, tseries=tseries) return [tseries, vseries]
[docs]class ZilienSpectrumReader: """A reader for individual Zilien spectra TODO: A Zilien reader which loads all spectra at once in a SpectrumSeries object """ def __init__(self, path_to_spectrum=None): self.path_to_spectrum = Path(path_to_spectrum) if path_to_spectrum else None
[docs] def read(self, path_to_spectrum, cls=None, **kwargs): """Make a measurement from all the single-value .tsv files in a Zilien tmp dir FIXME: This reader was written hastily and could be designed better. Args: path_to_tmp_dir (Path or str): the path to the tmp dir cls (Spectrum class): Defaults to MSSpectrum kwargs: Key-word arguments are passed on ultimately to cls.__init__ """ if path_to_spectrum: self.path_to_spectrum = Path(path_to_spectrum) cls = cls or MSSpectrum df = pd.read_csv( path_to_spectrum, header=9, delimiter="\t", ) x_name = "Mass [AMU]" y_name = "Current [A]" x = df[x_name].to_numpy() y = df[y_name].to_numpy() with open(self.path_to_spectrum, "r") as f: for i in range(10): line = f.readline() if "Mass scan started at [s]" in line: tstamp_match = re.search(FLOAT_MATCH, line) tstamp = float(tstamp_match.group()) xseries = DataSeries(data=x, name=x_name, unit_name="m/z") tseries = TimeSeries( data=np.array([0]), name="spectrum time / [s]", unit_name="s", tstamp=tstamp ) field = Field( data=np.array([y]), name=y_name, unit_name="A", axes_series=[xseries, tseries], ) obj_as_dict = { "name": path_to_spectrum.name, "technique": "MS", "field": field, "reader": self, } obj_as_dict.update(kwargs) return cls.from_dict(obj_as_dict)
if __name__ == "__main__": """Module demo here. To run this module in PyCharm, open Run Configuration and set Module name = ixdat.readers.zilien, and *not* Script path = ... """ from pathlib import Path from ixdat.measurements import Measurement path_to_test_file = Path.home() / ( "Dropbox/ixdat_resources/test_data/" # "zilien_with_spectra/2021-02-01 14_50_40.tsv" "zilien_with_ec/2021-02-01 17_44_12.tsv" ) ecms_measurement = Measurement.read( reader="zilien", path_to_file=path_to_test_file, ) ecms_measurement.plot_measurement()