Source code for layered_config_tree.main

"""
===================
Layered Config Tree
===================

A configuration structure that supports cascading layers.

Layered Config Tree allows base configurations to be overridden by multiple layers with
cascading priorities. The configuration values are presented as attributes of the
configuration object and are the value of the keys in the outermost layer of
configuration where they appear.

For example:

.. code-block:: python

    >>> config = LayeredConfigTree(layers=['inner_layer', 'middle_layer', 'outer_layer', 'user_overrides'])
    >>> config.update({'section_a': {'item1': 'value1', 'item2': 'value2'}, 'section_b': {'item1': 'value3'}}, layer='inner_layer')
    >>> config.update({'section_a': {'item1': 'value4'}, 'section_b': {'item1': 'value5'}}, layer='middle_layer')
    >>> config.update({'section_b': {'item1': 'value6'}}, layer='outer_layer')
    >>> config.section_a.item1
    'value4'
    >>> config.section_a.item2
    'value2'
    >>> config.section_b.item1
    'value6'

"""

from __future__ import annotations

from collections.abc import Iterable
from pathlib import Path
from typing import Any

from layered_config_tree import (
    ConfigurationError,
    ConfigurationKeyError,
    DuplicatedConfigurationError,
    ImproperAccessError,
    MissingLayerError,
)
from layered_config_tree.types import InputData
from layered_config_tree.utilities import load_yaml


[docs] class ConfigNode: """A priority based configuration value. A :class:`ConfigNode` represents a single configuration value with priority-based layers. The intent is to allow a value to be set from sources with different priorities and to record what the value was set to and from where. For example, a simulation may need certain values to always exist, and so it will set them up at a "base" layer. Components in the simulation may have a different set of priorities and so override the "base" value at a "component" level. Finally a user may want to override the simulation and component defaults with values at the command line or interactively, and so those values will be set in a final "user" layer. A :class:`ConfigNode` may only have a value set at each layer once. Attempts to set a value at the same layer multiple times will result in a :class:`~layered_config_tree.exceptions.DuplicatedConfigurationError`. The :class:`ConfigNode` will record all values set and the source they are set from. This sort of provenance with configuration data greatly eases debugging and analysis of simulation code. This class should not be instantiated directly. All interaction should take place by manipulating a :class:`LayeredConfigTree` object. """ def __init__(self, layers: list[str], name: str): """Initialize a ``ConfigNode``. Parameters ---------- layers Ordered list of layer names from lowest to highest priority. name The name of the parent tree node that owns this value. """ self._name = name self._layers = layers self._values: dict[str, tuple[str | None, Any]] = {} self._frozen = False self._accessed = False @property def name(self) -> str: """The name of this configuration value.""" return self._name @property def accessed(self) -> bool: """Whether or not this node has been accessed.""" return self._accessed @property def metadata(self) -> list[dict[str, Any | str | None]]: """All values and associated metadata for this node.""" result = [] for layer in self._layers: if layer in self._values: result.append( { "layer": layer, "source": self._values[layer][0], "value": self._values[layer][1], } ) return result
[docs] def freeze(self) -> None: """Convert the ``ConfigNode`` to read-only. This can be used to create a contract around when the configuration is modifiable. """ self._frozen = True
[docs] def get_value(self, layer: str | None = None) -> Any: """Return the value at the specified layer. If no layer is specified, the outermost (highest priority) layer at which a value has been set will be used. Parameters ---------- layer Name of the layer to retrieve the value from. Raises ------ ConfigurationKeyError If no value has been set at any layer (i.e. the ``ConfigNode`` is empty). MissingLayerError If values exist but not at the requested layer. """ value = self._get_value_with_source(layer)[1] self._accessed = True return value
[docs] def update(self, value: Any, layer: str | None, source: str | None) -> None: """Set a value for a layer with optional metadata about source. Parameters ---------- value Data to store in the node. layer Name of the layer to use. If no layer is provided, the value will be set in the outermost (highest priority) layer. source Metadata indicating the source of this value. Raises ------ ConfigurationError If the node is frozen. ConfigurationKeyError If the provided layer does not exist. DuplicatedConfigurationError If a value has already been set at the provided layer or a value is already in the outermost layer and no layer has been provided. """ if self._frozen: raise ConfigurationError( f"Frozen ConfigNode {self.name} does not support assignment.", self.name ) layer = layer if layer else self._layers[-1] if layer not in self._layers: raise ConfigurationKeyError( f"No layer {layer} in ConfigNode {self.name}.", self.name ) elif layer in self._values: source, value = self._values[layer] raise DuplicatedConfigurationError( f"Value has already been set at layer {layer}.", name=self.name, layer=layer, source=source, value=value, ) else: self._values[layer] = (source, value)
def _get_value_with_source(self, layer: str | None) -> tuple[str | None, Any]: """Return a (source, value) tuple at the specified layer. Parameters ---------- layer Name of the layer to retrieve the (source, value) pair from. Notes ----- We never return a default value at this point; all default value logic is handled upstream in the :meth:`get` method. Returns ------- The (source, value) tuple at the specified layer or, if no layer is specified, at the outermost (highest priority) layer. Raises ------ ConfigurationKeyError If no value has been set at any layer (i.e. the ``ConfigNode`` is empty). MissingLayerError If values exist but not at the requested layer. """ if layer is None: # Return the outermost (highest priority) layer's value for prioritized_layer in reversed(self._layers): if prioritized_layer in self._values: return self._values[prioritized_layer] elif layer in self._values: return self._values[layer] else: # The value does not exist at the user-requested layer raise MissingLayerError( f"No value stored in this ConfigNode {self.name} at layer {layer}.", f"{self.name}.{layer}", ) raise ConfigurationKeyError( f"No value stored in this ConfigNode {self.name}.", self.name ) def __bool__(self) -> bool: """Return True if a value has been set at any layer.""" return bool(self._values) def __repr__(self) -> str: """Return a detailed string showing values at all layers with sources.""" out = [] for m in reversed(self.metadata): layer, source, value = m.values() out.append(f"{layer}: {value}\n source: {source}") return "\n".join(out) def __str__(self) -> str: """Return a string showing the value at the outermost layer.""" if not self: return "" layer, _, value = self.metadata[-1].values() return f"{layer}: {value}"
[docs] class ConfigIterator: """An iterator over the keys of a :class:`LayeredConfigTree`.""" def __init__(self, config_tree: LayeredConfigTree): self._iterator = iter(config_tree._children) def __iter__(self) -> ConfigIterator: return self def __next__(self) -> str: return next(self._iterator)
[docs] class LayeredConfigTree: """A container for configuration information. Each configuration value is exposed as an attribute the value of which is determined by the outermost layer which has the key defined. """ # Define type annotations here since they're indirectly defined below _layers: list[str] _children: dict[str, LayeredConfigTree | ConfigNode] _frozen: bool _name: str def __init__( self, data: InputData | None = None, layers: list[str] = [], name: str = "", ): """Initialize a ``LayeredConfigTree``. Parameters ---------- data The ``LayeredConfigTree`` accepts many kinds of data: - :class:`dict` : Flat or nested dictionaries may be provided. Keys of dictionaries at all levels must be strings. - ``LayeredConfigTree`` : Another ``LayeredConfigTree`` can be used. All source information will be ignored and the source will be set to 'initial_data' and values will be stored at the lowest priority level. - :class:`str` : Strings provided can be yaml formatted strings, which will be parsed into a dictionary using standard yaml parsing. Alternatively, a path to a yaml file may be provided and the file will be read in and parsed. - :class:`pathlib.Path` : A path object to a yaml file will be interpreted the same as a string representation. All values will be set with 'initial_data' as the source and will use the lowest priority level. If values are set at higher priorities they will be used when the ``LayeredConfigTree`` is accessed. layers A list of layer names. The order in which layers defined determines their priority. Later layers override the values from earlier ones. name The name of the parent tree node that owns this tree. This is used for error messages and metadata. """ self.__dict__["_layers"] = layers if layers else ["base"] self.__dict__["_children"] = {} self.__dict__["_frozen"] = False self.__dict__["_name"] = name self.update(data, layer=self._layers[0], source="initial data")
[docs] def freeze(self) -> None: """Convert the ``LayeredConfigTree`` to read only. This is useful for loading and then freezing configurations that should not be modified at runtime. """ self.__dict__["_frozen"] = True for child in self.values(): child.freeze()
[docs] def items(self) -> Iterable[tuple[str, LayeredConfigTree | ConfigNode]]: """Return an iterable of all (child_name, child) pairs.""" return self._children.items()
[docs] def keys(self) -> Iterable[str]: """Return an Iterable of all child names.""" return self._children.keys()
[docs] def values(self) -> Iterable[LayeredConfigTree | ConfigNode]: """Return an Iterable of all children.""" return self._children.values()
[docs] def unused_keys(self) -> list[str]: """List all values in the ``LayeredConfigTree`` that haven't been accessed.""" unused = [] for name, child in self.items(): if isinstance(child, ConfigNode): if not child.accessed: unused.append(name) else: for grandchild_name in child.unused_keys(): unused.append(f"{name}.{grandchild_name}") return unused
[docs] def to_dict(self) -> dict[str, Any]: """Convert the ``LayeredConfigTree`` to a nested dictionary. All metadata is lost in this conversion. """ result = {} for name, child in self.items(): if isinstance(child, ConfigNode): result[name] = child.get_value(layer=None) else: result[name] = child.to_dict() return result
[docs] def get( self, keys: str | list[str], default_value: Any = None, layer: str | None = None ) -> Any: """Return the value at the key or key path in the outermost layer. Parameters ---------- keys The string or ordered list of strings to look for in the tree starting from the outermost layer. default_value The value to return if and only if the *final* key in the key path does not exist. layer The name of the layer to retrieve the value from. Notes ----- The ``default_value`` will only be used if *final* key in the key path does not exist *but the rest of the key path does*. Returns ------- The value at the key or nested keys and at the requested layer (the outer, by default). ``default_value`` (None, by default) is returned if the full key path *except for the final key* exists at an *explicitly-requested* layer. Raises ------ TypeError If the ``keys`` parameter is not a string or a list of strings. """ if not isinstance(keys, (str, list)): raise TypeError("The 'keys' parameter must be a string or a list of strings.") if isinstance(keys, str): if keys not in self._children: return default_value child = self._children[keys] if isinstance(child, ConfigNode): return child.get_value(layer=layer) return child else: # get the second-to-last value (which is by definition a LayeredConfigTree) final_key = keys.pop() tree = self.get_tree(keys) return tree.get(final_key, default_value=default_value, layer=layer)
[docs] def get_tree(self, keys: str | list[str]) -> LayeredConfigTree: """Return the ``LayeredConfigTree`` at the key or key path from the outermost layer. Parameters ---------- keys The key or key path to look up from the outermost layer. Returns ------- The ``LayeredConfigTree`` located at the key or key path provided starting from the outermost layer. Raises ------ TypeError If the ``keys`` parameter is not a string or list of strings. ConfigurationKeyError If any of the keys in the key path do not exist in the tree. ConfigurationError If the data at the final key in the key path is not a ``LayeredConfigTree``. """ if not isinstance(keys, (str, list)): raise TypeError("The 'keys' parameter must be a string or a list of strings.") if isinstance(keys, str): keys = [keys] tree = self for key in keys: if key not in tree: raise ConfigurationKeyError( f"No value at key mapping '{keys[:keys.index(key) + 1]}'." ) tree = tree[key] if not isinstance(tree, LayeredConfigTree): raise ConfigurationError( f"The data you accessed using {keys} with get_tree was of type {type(tree)}, " "but get_tree must return a LayeredConfigTree." ) return tree
[docs] def update( self, data: InputData | None, layer: str | None = None, source: str | None = None, ) -> None: """Add additional data into the ``LayeredConfigTree``. Parameters ---------- data The data used to update the ``LayeredConfigTree``. - :class:`dict` : Flat or nested dictionaries may be provided. Keys of dictionaries at all levels must be strings. - :class:`str` : Strings provided can be yaml formatted strings, which will be parsed into a dictionary using standard yaml parsing. Alternatively, a path to a yaml file may be provided and the file will be read in and parsed. - :class:`pathlib.Path` : A path object to a yaml file will be interpreted the same as a string representation. - ``LayeredConfigTree`` : Another ``LayeredConfigTree`` can be used. All source information will be ignored and the provided layer and source will be used to set the metadata. layer The name of the layer to store the value in. If no layer is provided, the value will be set in the outermost (highest priority) layer. source The source to attribute the value to. Raises ------ ConfigurationError If the ``LayeredConfigTree`` is frozen or attempting to assign an invalid value. ConfigurationKeyError If the provided layer does not exist. DuplicatedConfigurationError If a value has already been set at the provided layer or a value is already in the outermost layer and no layer has been provided. """ if data is not None: data_dict, source = self._coerce(data, source) for k, v in data_dict.items(): self._set_with_metadata(k, v, layer, source)
[docs] def metadata(self, name: str) -> list[dict[str, Any]]: """Return all values and associated metadata for the named child. Parameters ---------- name The name of the child to retrieve metadata for. Returns ------- A list of dictionaries, each containing 'layer', 'source', and 'value' keys for every layer at which the child has a value set. Raises ------ ConfigurationKeyError If no child with the given name exists. """ if name in self: return self._children[name].metadata # type: ignore[return-value] name = f"{self._name}.{name}" if self._name else name raise ConfigurationKeyError(f"No configuration value with name {name}", name)
@staticmethod def _coerce( data: InputData, source: str | None, ) -> tuple[dict[str, Any], str | None]: """Coerce input data into dictionary format. Parameters ---------- data The input data to coerce. Accepts dictionaries, ``LayeredConfigTree`` objects, YAML strings, and file paths. source The source to attribute the data to. If ``data`` is a string or Path and ``source`` is None, the string representation of ``data`` is used as the source. Returns ------- A tuple of (data_dict, source). Raises ------ ConfigurationError If ``data`` is not a supported type. """ if isinstance(data, dict): return data, source elif isinstance(data, LayeredConfigTree): return data.to_dict(), source elif isinstance(data, (str, Path)): source = source if source else str(data) return load_yaml(data), source else: raise ConfigurationError( f"LayeredConfigTree can only update from dictionaries, strings, paths, and LayeredConfigTrees. " f"You passed in {type(data)}", value_name=None, ) def _set_with_metadata( self, name: str, value: Any, layer: str | None, source: str | None, ) -> None: """Set a value in the named layer with the given source. Parameters ---------- name The name of the value. value The value to store. layer The name of the layer to store the value in. If no layer is provided, the value will be set in the outermost (highest priority) layer. source The source to attribute the value to. Raises ------ ConfigurationError If the ``LayeredConfigTree`` is frozen or attempting to assign an invalid value. ConfigurationKeyError If the provided layer does not exist. DuplicatedConfigurationError If a value has already been set at the provided layer or a value is already in the outermost layer and no layer has been provided. """ if self._frozen: raise ConfigurationError( f"Frozen LayeredConfigTree {self._name} does not support assignment.", self._name, ) if isinstance(value, dict): if name not in self: self._children[name] = LayeredConfigTree(layers=list(self._layers), name=name) if isinstance(self._children[name], ConfigNode): name = f"{self._name}.{name}" if self._name else name raise ConfigurationError( f"Can't assign a dictionary as a value to a ConfigNode.", name ) else: if name not in self: self._children[name] = ConfigNode(list(self._layers), name=self._name) if isinstance(self._children[name], LayeredConfigTree): name = f"{self._name}.{name}" if self._name else name raise ConfigurationError( f"Can't assign a value to a LayeredConfigTree.", name ) self._children[name].update(value, layer, source) def __setattr__(self, name: str, value: Any) -> None: """Set a value on the outermost layer. Notes ----- We allow keys that look like dunder attributes, i.e. start and end with "__". However, to avoid conflict with actual dunder methods and attributes, we do not allow setting them via this method and instead require dictionary access (i.e. bracket notation). """ if name not in self: raise ConfigurationKeyError( "New configuration keys can only be created with the update method.", self._name, ) if name.startswith("__") and name.endswith("__"): raise ImproperAccessError( "Cannot set an attribute starting and ending with '__' via attribute " "access (i.e. dot notation). Use dictionary access instead " "(i.e. bracket notation)." ) self._set_with_metadata(name, value, layer=None, source=None) def __setitem__(self, name: str, value: Any) -> None: """Set a value on the outermost layer.""" if name not in self: raise ConfigurationKeyError( "New configuration keys can only be created with the update method.", self._name, ) self._set_with_metadata(name, value, layer=None, source=None) # FIXME: We expect the return to be a ConfigNode or LayeredConfigTree but # static type checkers don't know what you're getting back in chained # attribute calls. We type hint returning Any as a workaround. def __getattr__(self, name: str) -> Any: """Get a value from the outermost layer in which it appears. Notes ----- We allow keys that look like dunder attributes, i.e. start and end with "__". However, to avoid conflict with actual dunder methods and attributes, we do not allow getting them via this method and instead require dictionary access (i.e. bracket notation). If the requested attribute starts and ends with "__" but *does not* actually exist, it is critical that we raise an AttributeError since some functions specifically handle it, e.g. ``pickle`` and ``copy.deepcopy``. See https://stackoverflow.com/a/50888571/ One the other hand, if the requested attribute starts and ends with "__" and *does* exist, it is critical that we raise a non-AttributeError exception so as not to conflict with dunder methods and attributes. Raises ------ ConfigurationKeyError If the requested attribute does not exist. """ if name.startswith("__") and name.endswith("__"): if name not in self: raise AttributeError # Do not change from AttributeError raise ImproperAccessError( "Cannot get an attribute starting and ending with '__' via attribute " "access (i.e. dot notation). Use dictionary access instead " "(i.e. bracket notation)." ) return self[name] # We need custom definitions of __getstate__ and __setstate__ # because of our custom attribute getters/setters. # Specifically: # * The pickle module will invoke our __getattr__ checking for __getstate__ # and __setstate__, and only catch AttributeError (not ConfigurationKeyError), and # * Calling __getattr__ before we have set up the state doesn't work, # because it leads to an infinite loop looking for the module's # actual attributes (not config keys) def __getstate__(self) -> dict[str, Any]: """Return picklable state. Notes ----- Custom definitions of ``__getstate__`` and ``__setstate__`` are needed because the custom ``__getattr__`` and ``__setattr__`` interfere with the default pickle protocol. """ return self.__dict__ def __setstate__(self, state: dict[str, Any]) -> None: """Restore state from a pickle.""" for k, v in state.items(): self.__dict__[k] = v def __getitem__(self, name: str) -> Any: """Get a value from the outermost layer in which it appears. Raises ------ ConfigurationKeyError If the requested key does not exist. """ if name not in self: name = f"{self._name}.{name}" if self._name else name raise ConfigurationKeyError(f"No value at name {name}.", name) return self.get(name) def __delattr__(self, name: str) -> None: """Delete a child by name using attribute syntax.""" if name in self: del self._children[name] def __delitem__(self, name: str) -> None: """Delete a child by name using bracket syntax.""" if name in self: del self._children[name] def __contains__(self, name: str) -> bool: """Test if a configuration key exists in any layer.""" return name in self._children def __iter__(self) -> ConfigIterator: """Dictionary-like iteration.""" return ConfigIterator(self) def __len__(self) -> int: """Return the number of children in the tree.""" return len(self._children) def __dir__(self) -> list[str]: """List child names along with standard object attributes.""" return list(self._children.keys()) + dir(super()) def __repr__(self) -> str: """Return a detailed multi-line string of all children and their values.""" return "\n".join( [ "{}:\n {}".format(name, repr(c).replace("\n", "\n ")) for name, c in self._children.items() ] ) def __str__(self) -> str: """Return a human-readable summary of all children at their outermost layer.""" return "\n".join( [ "{}:\n {}".format(name, str(c).replace("\n", "\n ")) for name, c in self._children.items() ] ) def __eq__(self, other: object) -> bool: """Equality comparison is not supported. Raises ------ NotImplementedError Always. """ raise NotImplementedError