Source code for glotaran.parse.util

import re
from typing import List
from typing import Tuple
from typing import Union

# tuple_pattern = re.compile(r"(\(.*?,.*?\))")
tuple_number_pattern = re.compile(r"(\([\s\d.+-]+?[,\s\d.+-]*?\))")
number_pattern = re.compile(r"[\d.+-]+")
tuple_name_pattern = re.compile(r"(\([.\s\w\d]+?[,.\s\w\d]*?\))")
name_pattern = re.compile(r"[\w]+")
group_pattern = re.compile(r"(\(.+?\))")
match_list_with_tuples = re.compile(r"(\[.+\(.+\).+\])")
match_elements_in_string_of_list = re.compile(r"(\(.+?\)|[-+.\d]+)")


[docs]def sanitize_list_with_broken_tuples(mangled_list: List[Union[str, float]]) -> List[str]: """Sanitize a list with 'broken' tuples A list of broken tuples as returned by yaml when parsing tuples. e.g parsing the list of tuples [(3,100), (4,200)] results in a list of str ['(3', '100)', '(4', '200)'] which can be restored to a list with the tuples restored as strings ['(3, 100)', '(4, 200)'] Parameters ---------- mangled_list : List[Union[str,float]] A list with strings representing tuples broken up by round brackets. Returns ------- List[str] A list containing the restores tuples (in string form) which can be converted back to numbered tuples using `list_string_to_tuple` """ sanitized_string = str(mangled_list).replace("'", "") return list(match_elements_in_string_of_list.findall(sanitized_string))
[docs]def sanitize_dict_keys(d: dict) -> dict: """Sanitize the stringified tuple dict keys in a yaml parsed dict Keys representing a tuple, e.g. '(s1, s2)' are converted to a tuple of strings e.g. ('s1', 's2') Parameters ---------- d : dict A dict containing tuple-like string keys Returns ------- dict A dict with tuple-like string keys converted to tuple keys """ if not isinstance(d, (dict, list)): return {} d_new = {} for k, v in d.items() if isinstance(d, dict) else enumerate(d): if isinstance(d, dict) and isinstance(k, str) and tuple_name_pattern.match(k): k_new = tuple(map(str, name_pattern.findall(k))) d_new.update({k_new: v}) elif isinstance(d, (dict, list)): new_v = sanitize_dict_keys(v) if new_v: d[k] = new_v return d_new
[docs]def sanitize_dict_values(d: dict): """Sanitizes a dict with broken tuples inside modifying it in-place Broken tuples are tuples that are turned into strings by the yaml parser. This functions calls `sanitize_list_with_broken_tuples` to glue the broken strings together and then calls list_to_tuple to turn the list with tuple strings back to number tuples. Args: d (dict): A (complex) dict containing (possibly nested) values of broken tuple strings """ if not isinstance(d, (dict, list)): return for k, v in d.items() if isinstance(d, dict) else enumerate(d): if isinstance(v, list): leaf = all(isinstance(el, (str, tuple, float)) for el in v) if leaf: if "(" in str(v): d[k] = list_string_to_tuple(sanitize_list_with_broken_tuples(v)) else: sanitize_dict_values(v) if isinstance(v, dict): sanitize_dict_values(v) if isinstance(v, str): d[k] = string_to_tuple(v)
[docs]def string_to_tuple( tuple_str: str, from_list=False ) -> Union[Tuple[float], Tuple[str], float, str]: """[summary] Parameters ---------- tuple_str : str A string representing some tuple to convert the numbers inside the string tuple are mapped to float from_list : bool, optional only if true will a single number string be converted to float, otherwise returned as-is since it may represent a label, by default False Returns ------- Union[Tuple[float], Tuple[str], float, str] Returns the tuple intended by the string """ if tuple_number_pattern.match(tuple_str): return tuple(map(float, number_pattern.findall(tuple_str))) elif tuple_name_pattern.match(tuple_str): return tuple(map(str, name_pattern.findall(tuple_str))) elif from_list and number_pattern.match(tuple_str): return float(tuple_str) else: return tuple_str
[docs]def list_string_to_tuple(a_list: List[str]) -> List[Union[float, str]]: """Converts a list of strings (representing tuples) to a list of tuples Parameters ---------- a_list : List[str] A list of strings, some of them representing (numbered) tuples Returns ------- List[Union[float, str]] A list of the (numbered) tuples represted by the incoming a_list """ for i, v in enumerate(a_list): a_list[i] = string_to_tuple(v, from_list=True) return a_list
[docs]def sanitize_yaml(d: dict, do_keys: bool = True, do_values: bool = False) -> dict: """Sanitize a yaml-returned dict for key or (list) values containing tuples Parameters ---------- d : dict a dict resulting from parsing a pyglotaran model spec yml file Returns ------- dict a sanitized dict with (broken) string tuples restored as proper tuples """ if do_keys: sanitize_dict_keys(d) if do_values: # this is only needed to allow for tuple parsing in specification sanitize_dict_values(d) return d