Source code for glotaran.builtin.file_formats.ascii.wavelength_time_explicit_file

from __future__ import annotations

import os.path
import re
import warnings
from enum import Enum

import numpy as np
import pandas as pd
import xarray as xr

from glotaran.io.prepare_dataset import prepare_time_trace_dataset
from glotaran.io.reader import file_reader


[docs]class DataFileType(Enum):
    time_explicit = "Time explicit"
    wavelength_explicit = "Wavelength explicit"


[docs]class ExplicitFile:
    """
    Abstract class representing either a time- or wavelength-explicit file.
    """

    # TODO: implement time_intervals
    def __init__(self, filepath: str = None, dataset: xr.DataArray = None):
        self._file_data_format = None
        self._observations = []  # TODO: choose name: data_points, observations, data
        self._times = []
        self._spectral_indices = []
        self._label = ""
        self._comment = ""
        absfilepath = os.path.realpath(filepath)
        if dataset is not None:
            self._observations = np.array(dataset.values).T
            self._times = np.array(dataset.coords["time"])
            self._spectral_indices = np.array(dataset.coords["spectral"])
            self._file = filepath
        elif os.path.isfile(filepath):
            self._file = filepath
        elif os.path.isfile(absfilepath):
            self._file = absfilepath
        else:
            raise Exception(f"Path does not exist: {filepath}, {absfilepath}")

[docs]    def get_explicit_axis(self):
        raise NotImplementedError

[docs]    def set_explicit_axis(self, axis):
        raise NotImplementedError

[docs]    def get_secondary_axis(self):
        raise NotImplementedError

[docs]    def get_data_row(self, index):
        raise NotImplementedError

[docs]    def get_observations(self, index):
        raise NotImplementedError

[docs]    def get_format_name(self):
        raise NotImplementedError

[docs]    def write(
        self,
        overwrite=False,
        comment="",
        file_format=DataFileType.time_explicit,
        number_format="%.10e",
    ):
        # TODO: write a more elegant method

        if os.path.isfile(self._file) and not overwrite:
            print("File {} already exists".format(os.path.isfile(self._file)))
            raise Exception("File already exist.")
        comment = self._comment + " " + comment

        comments = "# Filename: " + str(self._file) + "\n" + " ".join(comment.splitlines()) + "\n"

        if file_format == DataFileType.wavelength_explicit:
            wav = "\t".join(repr(num) for num in self._spectral_indices)
            header = (
                comments + "Wavelength explicit\nIntervalnr {}"
                "".format(len(self._spectral_indices)) + "\n" + wav
            )
            raw_data = np.vstack((self._times.T, self._observations)).T
        elif file_format == DataFileType.time_explicit:
            tim = "\t".join(repr(num) for num in self._times)
            header = (
                comments + "Time explicit\nIntervalnr {}" "".format(len(self._times)) + "\n" + tim
            )
            raw_data = np.vstack((self._spectral_indices.T, self._observations.T)).T
        else:
            raise NotImplementedError

        np.savetxt(
            self._file,
            raw_data,
            fmt=number_format,
            delimiter="\t",
            newline="\n",
            header=header,
            footer="",
            comments="",
        )

[docs]    def read(self, prepare: bool = True):
        if not os.path.isfile(self._file):
            raise Exception("File does not exist.")
        with open(self._file) as f:
            f.readline()  # The first two lines are comments
            f.readline()
            # The third line defines the ExplicitFileFormat (Time or Wavelength explicit)
            self._file_data_format = get_data_file_format(f.readline())
            # The fourth line define the number of elements on the explicit axis, which
            # we can ignore because pandas is intelligent enough to read it
        # read the first line (explicit_axis) separately
        explicit_axis = pd.read_csv(
            self._file, skiprows=4, delimiter=r"\s+", header=None, nrows=1
        ).values
        explicit_axis = explicit_axis[0, :]  # reshape to (n,)
        # then the rest of the data:
        rest_of_data = pd.read_csv(self._file, skiprows=5, delimiter=r"\s+", header=None).values
        secondary_axis = rest_of_data[:, 0]
        observations = rest_of_data[:, 1:]
        if self._file_data_format == DataFileType.time_explicit:
            self._times = explicit_axis  # (501,)
            self._spectral_indices = secondary_axis  # (51,)
            self._observations = observations  # len(observation)=51 . (51, 501)
        elif self._file_data_format == DataFileType.wavelength_explicit:
            self._spectral_indices = explicit_axis
            self._times = secondary_axis
            self._observations = observations
        else:
            raise NotImplementedError()
        return self.dataset(prepare=prepare)

[docs]    def dataset(self, prepare: bool = True) -> xr.Dataset | xr.DataArray:
        data = self._observations
        if self._file_data_format == DataFileType.time_explicit:
            data = data.T
        dataset = xr.DataArray(
            data, coords=[("time", self._times), ("spectral", self._spectral_indices)]
        )
        if prepare:
            dataset = prepare_time_trace_dataset(dataset)
        return dataset


[docs]class WavelengthExplicitFile(ExplicitFile):
    """
    Represents a wavelength explicit file
    """

[docs]    def get_explicit_axis(self):
        return self._spectral_indices

[docs]    def get_secondary_axis(self):
        return self.observations()

[docs]    def get_data_row(self, index):
        return []

[docs]    def add_data_row(self, row):
        if self._timepoints is None:
            self._timepoints = []
        self._timepoints.append(float(row.pop(0)))

        if self._spectra is None:
            self._spectra = []
        self._spectra.append(float(row))

[docs]    def get_format_name(self):
        return DataFileType.wavelength_explicit

[docs]    def times(self):
        return self.get_secondary_axis()

[docs]    def wavelengths(self):
        return self.get_explicit_axis()


[docs]class TimeExplicitFile(ExplicitFile):
    """
    Represents a time explicit file
    """

[docs]    def get_explicit_axis(self):
        return self.observations()

[docs]    def set_explicit_axis(self, axies):
        self._timepoints = float(axies)

[docs]    def get_secondary_axis(self):
        return self.channel_labels

[docs]    def get_data_row(self, index):
        return self.get_channel(self.channel_labels()[index])

[docs]    def add_data_row(self, row):
        if self._spectral_indices is None:
            self._spectral_indices = []
        self._spectral_indices.append(row.pop(0))

        if self._spectra is None:
            self._spectra = []
        self._spectra.append(float(row))

[docs]    def get_format_name(self):
        return DataFileType.time_explicit


[docs]def get_interval_number(line):
    interval_number = None
    match = re.search(r"intervalnr\s(.*)", line.strip().lower())
    if match:
        interval_number = match.group(1)
    if not interval_number:
        interval_number = re.search(r"\d+", line[::-1]).group()[::-1]
    try:
        interval_number = int(interval_number)
    except ValueError:
        warnings.warn(f"No interval number found in line:\n{line}")
        interval_number = None
    return interval_number


[docs]def get_data_file_format(line):
    data_file_format = None
    if re.search(r"time\s+explicit|time\t+explicit", line.strip().lower()):
        # print("Time explicit format") #TODO: verbosity / debug statement
        data_file_format = DataFileType.time_explicit
    elif re.search(r"wavelength\s+explicit|wavelength\t+explicit", line.strip().lower()):
        # print("Wavelength explicit format") #TODO: verbosity / debug statement
        data_file_format = DataFileType.wavelength_explicit
    else:
        raise NotImplementedError()
    return data_file_format


[docs]@file_reader(extension="ascii", name="Wavelength-/Time-Explicit ASCII")
def read_ascii_time_trace(fname: str, prepare: bool = True) -> xr.Dataset:
    """Reads an ascii file in wavelength- or time-explicit format.

    See [1]_ for documentation of this format.

    Parameters
    ----------
    fname : str
        Name of the ascii file.

    Returns
    -------
    dataset : xr.Dataset

    Notes
    -----
    .. [1] https://glotaran.github.io/legacy/file_formats
    """

    data_file_format = None
    with open(fname) as f:
        f.readline()  # Read first line with comments (and discard for now)
        f.readline()  # Read second line with comments (and discard for now)
        data_file_format = get_data_file_format(f.readline())

    data_file = (
        WavelengthExplicitFile(filepath=fname)
        if data_file_format is DataFileType.wavelength_explicit
        else TimeExplicitFile(fname)
    )

    return data_file.read(prepare=prepare)


[docs]def write_ascii_time_trace(
    filename: str,
    dataset: xr.DataArray,
    overwrite=False,
    comment="",
    file_format="TimeExplicit",
    number_format="%.10e",
):
    data_file = (
        TimeExplicitFile(filepath=filename, dataset=dataset)
        if file_format == "TimeExplicit"
        else WavelengthExplicitFile(filepath=filename, dataset=dataset)
    )
    data_file.write(
        overwrite=overwrite, comment=comment, file_format=file_format, number_format=number_format
    )