"""The glotaran project module."""
from __future__ import annotations
import re
from dataclasses import dataclass
from dataclasses import field
from importlib.metadata import distribution
from pathlib import Path
from textwrap import dedent
from typing import Any
from typing import Literal
import xarray as xr
from glotaran.builtin.io.yml.utils import load_dict
from glotaran.model import Model
from glotaran.parameter import ParameterGroup
from glotaran.project.project_data_registry import ProjectDataRegistry
from glotaran.project.project_model_registry import ProjectModelRegistry
from glotaran.project.project_parameter_registry import ProjectParameterRegistry
from glotaran.project.project_result_registry import ProjectResultRegistry
from glotaran.project.result import Result
from glotaran.project.scheme import Scheme
from glotaran.utils.io import make_path_absolute_if_relative
from glotaran.utils.ipython import MarkdownStr
TEMPLATE = "version: {gta_version}"
PROJECT_FILE_NAME = "project.gta"
[docs]@dataclass
class Project:
"""A project represents a projectfolder on disk which contains a project file.
A project file is a file in `yml` format with name `project.gta`
"""
version: str = field(init=False)
file: Path
folder: Path | None = field(default=None)
def __post_init__(self):
"""Overwrite of post init."""
self.file = Path(self.file)
if self.folder is None:
self.folder = self.file.parent
self.folder = Path(self.folder)
self.version = distribution("pyglotaran").version
self._data_registry = ProjectDataRegistry(self.folder)
self._model_registry = ProjectModelRegistry(self.folder)
self._parameter_registry = ProjectParameterRegistry(self.folder)
self._result_registry = ProjectResultRegistry(self.folder)
[docs] @staticmethod
def create(folder: str | Path, allow_overwrite: bool = False) -> Project:
"""Create a new project folder and file.
Parameters
----------
folder : str | Path | None
The folder where the project will be created. If ``None``, the current work
directory will be used.
allow_overwrite: bool
Whether to overwrite an existing project file.
Returns
-------
Project
The created project.
Raises
------
FileExistsError
Raised if the project file already exists and `allow_overwrite=False`.
"""
project_folder = make_path_absolute_if_relative(Path(folder))
project_folder.mkdir(parents=True, exist_ok=True)
project_file = project_folder / PROJECT_FILE_NAME
if project_file.exists() and not allow_overwrite:
raise FileExistsError(
f"Project file '{project_file}' already exist. "
"Set `allow_overwrite=True` to overwrite."
)
project_file.write_text(TEMPLATE.format(gta_version=distribution("pyglotaran").version))
return Project.open(project_file, create_if_not_exist=True)
[docs] @classmethod
def open(cls, project_folder_or_file: str | Path, create_if_not_exist: bool = True) -> Project:
"""Open a new project.
Parameters
----------
project_folder_or_file : str | Path
The path to a project folder or file.
create_if_not_exist : bool
Create the project if not existent.
Returns
-------
Project
The project instance.
Raises
------
FileNotFoundError
Raised when the project file does not not exist and `create_if_not_exist` is `False`.
"""
folder = make_path_absolute_if_relative(Path(project_folder_or_file))
if folder.name == PROJECT_FILE_NAME:
folder, file = folder.parent, folder
else:
file = folder / PROJECT_FILE_NAME
if file.is_file() is False:
if create_if_not_exist is False:
raise FileNotFoundError(f"Project file {file.as_posix()} does not exists.")
Project.create(folder)
project_dict = load_dict(file, True)
project_dict["file"] = file
project_dict["folder"] = folder
version = project_dict.pop("version")
project = cls(**project_dict)
project.version = version
return project
@property
def has_data(self) -> bool:
"""Check if the project has datasets.
Returns
-------
bool
Whether the project has datasets.
"""
return not self._data_registry.empty
@property
def data(self) -> dict[str, Path]:
"""Get all project datasets.
Returns
-------
dict[str, Path]
The models of the datasets.
"""
return self._data_registry.items
[docs] def load_data(self, dataset_name: str) -> xr.Dataset | xr.DataArray:
"""Load a dataset.
Parameters
----------
dataset_name : str
The name of the dataset.
Returns
-------
Result
The loaded dataset.
Raises
------
ValueError
Raised if the dataset does not exist.
"""
try:
return self._data_registry.load_item(dataset_name)
except ValueError as e:
raise ValueError(f"Dataset {dataset_name!r} does not exist.") from e
[docs] def import_data(
self,
path: str | Path,
name: str | None = None,
allow_overwrite: bool = False,
ignore_existing: bool = False,
):
"""Import a dataset.
Parameters
----------
path : str | Path
The path to the dataset.
name : str | None
The name of the dataset.
allow_overwrite: bool
Whether to overwrite an existing dataset.
ignore_existing: bool
Whether to ignore import if the dataset already exists.
"""
self._data_registry.import_data(
path, name=name, allow_overwrite=allow_overwrite, ignore_existing=ignore_existing
)
@property
def has_models(self) -> bool:
"""Check if the project has models.
Returns
-------
bool
Whether the project has models.
"""
return not self._model_registry.empty
@property
def models(self) -> dict[str, Path]:
"""Get all project models.
Returns
-------
dict[str, Path]
The models of the project.
"""
return self._model_registry.items
[docs] def load_model(self, name: str) -> Model:
"""Load a model.
Parameters
----------
name : str
The name of the model.
Returns
-------
Model
The loaded model.
Raises
------
ValueError
Raised if the model does not exist.
"""
try:
return self._model_registry.load_item(name)
except ValueError as e:
raise ValueError(f"Model {name!r} does not exist.") from e
[docs] def generate_model(
self,
model_name: str,
generator_name: str,
generator_arguments: dict[str, Any],
*,
allow_overwrite: bool = False,
ignore_existing: bool = False,
):
"""Generate a model.
Parameters
----------
model_name : str
The name of the model.
generator_name : str
The generator for the model.
generator_arguments : dict[str, Any]
Arguments for the generator.
allow_overwrite: bool
Whether to overwrite an existing model.
ignore_existing: bool
Whether to ignore generation of a model file if it already exists.
"""
self._model_registry.generate_model(
model_name,
generator_name,
generator_arguments,
allow_overwrite=allow_overwrite,
ignore_existing=ignore_existing,
)
[docs] def get_models_directory(self) -> Path:
"""Get the path to the model directory of the project.
Returns
-------
Path
The path to the project's model directory.
"""
return self._model_registry.directory
@property
def has_parameters(self) -> bool:
"""Check if the project has parameters.
Returns
-------
bool
Whether the project has parameters.
"""
return not self._parameter_registry.empty
@property
def parameters(self) -> dict[str, Path]:
"""Get all project parameters.
Returns
-------
dict[str, Path]
The parameters of the project.
"""
return self._parameter_registry.items
[docs] def load_parameters(self, parameters_name: str) -> ParameterGroup:
"""Load parameters.
Parameters
----------
parameters_name : str
The name of the parameters.
Returns
-------
ParameterGroup
The loaded parameters.
Raises
------
ValueError
Raised if parameters do not exist.
"""
try:
return self._parameter_registry.load_item(parameters_name)
except ValueError as e:
raise ValueError(f"Parameters '{parameters_name}' does not exist.") from e
[docs] def generate_parameters(
self,
model_name: str,
parameters_name: str | None = None,
*,
format_name: Literal["yml", "yaml", "csv"] = "csv",
allow_overwrite: bool = False,
ignore_existing: bool = False,
):
"""Generate parameters for a model.
Parameters
----------
model_name : str
The model.
parameters_name : str | None
The name of the parameters. If ``None`` it will be <model_name>_parameters.
format_name : Literal["yml", "yaml", "csv"]
The parameter format.
allow_overwrite: bool
Whether to overwrite existing parameters.
ignore_existing: bool
Whether to ignore generation of a parameter file if it already exists.
"""
model = self.load_model(model_name)
parameters_name = (
parameters_name if parameters_name is not None else f"{model_name}_parameters"
)
self._parameter_registry.generate_parameters(
model,
parameters_name,
format_name=format_name,
allow_overwrite=allow_overwrite,
ignore_existing=ignore_existing,
)
[docs] def get_parameters_directory(self) -> Path:
"""Get the path to the parameter directory of the project.
Returns
-------
Path
The path to the project's parameter directory.
"""
return self._parameter_registry.directory
@property
def has_results(self) -> bool:
"""Check if the project has results.
Returns
-------
bool
Whether the project has results.
"""
return not self._result_registry.empty
@property
def results(self) -> dict[str, Path]:
"""Get all project results.
Returns
-------
dict[str, Path]
The results of the project.
"""
return self._result_registry.items
[docs] def get_result_path(self, result_name: str, *, latest: bool = False) -> Path:
"""Get the path to a result with name ``name``.
Parameters
----------
result_name : str
The name of the result.
latest: bool
Flag to deactivate warning about using latest result. Defaults to False
Returns
-------
Path
The path to the result.
Raises
------
ValueError
Raised if result does not exist.
"""
result_name = self._result_registry._latest_result_name_fallback(
result_name, latest=latest
)
path = self._result_registry.directory / result_name
if self._result_registry.is_item(path):
return path
raise ValueError(f"Result {result_name!r} does not exist.")
[docs] def get_latest_result_path(self, result_name: str) -> Path:
"""Get the path to a result with name ``name``.
Parameters
----------
result_name : str
The name of the result.
Returns
-------
Path
The path to the result.
Raises
------
ValueError
Raised if result does not exist.
.. # noqa: DAR402
"""
result_name = re.sub(self._result_registry.result_pattern, "", result_name)
return self.get_result_path(result_name, latest=True)
[docs] def load_result(self, result_name: str, *, latest: bool = False) -> Result:
"""Load a result.
Parameters
----------
result_name : str
The name of the result.
latest: bool
Flag to deactivate warning about using latest result. Defaults to False
Returns
-------
Result
The loaded result.
Raises
------
ValueError
Raised if result does not exist.
"""
result_name = self._result_registry._latest_result_name_fallback(
result_name, latest=latest
)
try:
return self._result_registry.load_item(result_name)
except ValueError as e:
raise ValueError(f"Result {result_name!r} does not exist.") from e
[docs] def load_latest_result(self, result_name: str) -> Result:
"""Load a result.
Parameters
----------
result_name : str
The name of the result.
Returns
-------
Result
The loaded result.
Raises
------
ValueError
Raised if result does not exist.
.. # noqa: DAR402
"""
result_name = re.sub(self._result_registry.result_pattern, "", result_name)
return self.load_result(result_name, latest=True)
[docs] def create_scheme(
self,
model_name: str,
parameters_name: str,
maximum_number_function_evaluations: int | None = None,
clp_link_tolerance: float = 0.0,
) -> Scheme:
"""Create a scheme for optimization.
Parameters
----------
model_name : str
The model to optimize.
parameters_name : str
The initial parameters.
maximum_number_function_evaluations : int | None
The maximum number of function evaluations.
clp_link_tolerance : float
The CLP link tolerance.
Returns
-------
Scheme
The created scheme.
"""
loaded_model = self.load_model(model_name)
data = {
dataset: self.load_data(dataset)
for dataset in loaded_model.dataset # type:ignore[attr-defined]
}
return Scheme(
model=loaded_model,
parameters=self.load_parameters(parameters_name),
data=data,
maximum_number_function_evaluations=maximum_number_function_evaluations,
clp_link_tolerance=clp_link_tolerance,
)
[docs] def optimize(
self,
model_name: str,
parameters_name: str,
result_name: str | None = None,
maximum_number_function_evaluations: int | None = None,
clp_link_tolerance: float = 0.0,
):
"""Optimize a model.
Parameters
----------
model_name : str
The model to optimize.
parameters_name : str
The initial parameters.
result_name : str | None
The name of the result.
maximum_number_function_evaluations : int | None
The maximum number of function evaluations.
clp_link_tolerance : float
The CLP link tolerance.
"""
from glotaran.optimization.optimize import optimize
scheme = self.create_scheme(
model_name, parameters_name, maximum_number_function_evaluations, clp_link_tolerance
)
result = optimize(scheme)
result_name = result_name or model_name
self._result_registry.save(result_name, result)
[docs] def markdown(self) -> MarkdownStr:
"""Format the project as a markdown text.
Returns
-------
MarkdownStr : str
The markdown string.
"""
folder_as_posix = self.folder.as_posix() # type:ignore[union-attr]
md = f"""\
# Project _{folder_as_posix}_
pyglotaran version: {self.version}
## Data
{self._data_registry.markdown(join_indentation=12)}
## Model
{self._model_registry.markdown(join_indentation=12)}
## Parameters
{self._parameter_registry.markdown(join_indentation=12)}
## Results
{self._result_registry.markdown(join_indentation=12)}
"""
return MarkdownStr(dedent(md))
def _repr_markdown_(self) -> str:
"""Create a markdown respresentation.
Special method used by ``ipython`` to render markdown.
Returns
-------
str :
The markdown representation as string.
"""
return str(self.markdown())