"""Functions to work with context (dictionary)."""
import copy
import lena.core
# pylint: disable=invalid-name
# d is a good name for dictionary,
# used in Python documentation for dict.
[docs]def contains(d, s):
"""Check that a dictionary *d* contains a subdictionary
defined by a string *s*.
True if *d* contains a subdictionary that is represented by *s*.
Dots in *s* mean nested subdictionaries.
A string without dots means a key in *d*.
Example:
>>> d = {'fit': {'coordinate': 'x'}}
>>> contains(d, "fit")
True
>>> contains(d, "fit.coordinate.x")
True
>>> contains(d, "fit.coordinate.y")
False
If the most nested element of *d* to be compared with *s*
is not a string, its string representation is used for comparison.
See also :func:`str_to_dict`.
"""
# todo: s can be a list, or a dict?
levels = s.split(".")
if len(levels) < 2:
return s in d
subdict = d
for key in levels[:-1]:
if key not in subdict:
return False
subdict = subdict[key]
last_val = levels[-1]
if isinstance(subdict, dict):
return last_val in subdict
else:
# just a value
try:
# it's better to test for an object to be cast to str
# than to disallow "dim.1"
subd = str(subdict)
except Exception:
return False
else:
return subd == last_val
[docs]def difference(d1, d2, level=-1):
"""Return a dictionary with items from *d1* not contained in *d2*.
*level* sets the maximum depth of recursion. For infinite recursion,
set that to -1. For level 1,
if a key is present both in *d1* and *d2* but has different values,
it is included into the difference.
See :func:`intersection` for more details.
*d1* and *d2* remain unchanged. However, *d1* or some of its
subdictionaries may be returned directly.
Make a deep copy of the result when appropriate.
.. versionadded:: 0.5
add keyword argument *level*.
"""
# can become not dicts during the recursion
if not isinstance(d1, dict) or not isinstance(d2, dict):
return d1
if d1 == d2:
return {}
elif level == 0:
return d1
# some keys differ
result = {}
for key in d1:
if key not in d2:
result[key] = d1[key]
elif d1[key] != d2[key]:
res = difference(d1[key], d2[key], level-1)
# if d2[key] contains all d1[key] elements,
# the difference will be empty
if res:
result[key] = res
return result
[docs]def format_context(format_str):
"""Create a function that formats a given string using a context.
It is recommended to use jinja2.Template.
Use this function only if you don't have jinja2.
*format_str* is a Python format string with double braces
instead of single ones.
It must contain all non-empty replacement fields,
and only simplest formatting without attribute lookup.
Example:
>>> f = format_context("{{x}}")
>>> f({"x": 10})
'10'
When calling *format_context*, arguments are bound and
a new function is returned. When called with a context,
its keys are extracted and formatted in *format_str*.
Keys can be nested using a dot, for example:
>>> f = format_context("{{x.y}}_{{z}}")
>>> f({"x": {"y": 10}, "z": 1})
'10_1'
This function does not work with unbalanced braces.
If a simple check fails, :exc:`.LenaValueError` is raised.
If *format_str* is not a string, :exc:`.LenaTypeError` is raised.
All other errors are raised only during formatting.
If context doesn't contain the needed key,
:exc:`.LenaKeyError` is raised.
Note that string formatting can also raise a :exc:`ValueError`,
so it is recommended to test your formatters before using them.
"""
if not isinstance(format_str, str):
raise lena.core.LenaTypeError(
"format_str must be a string, {} given".format(format_str)
)
# prohibit single or unbalanced braces
if format_str.count('{') != format_str.count('}'):
raise lena.core.LenaValueError("unbalanced braces in '{}'".format(format_str))
if '{' in format_str and not '{{' in format_str:
raise lena.core.LenaValueError(
"double braces must be used for formatting instead of '{}'"
.format(format_str)
)
# new format: now double braces instead of single ones.
# but the algorithm may be left unchanged.
format_str = format_str.replace("{{", "{").replace("}}", "}")
new_str = []
new_args = []
prev_char = ''
ind = 0
within_field = False
while ind < len(format_str):
c = format_str[ind]
if c != '{' and not within_field:
prev_char = c
new_str.append(c)
ind += 1
continue
while c == '{' and ind < len(format_str):
new_str.append(c)
# literal formatting { are not allowed
# if prev_char == '{':
# prev_char = ''
# within_field = False
# else:
prev_char = c
within_field = True
ind += 1
c = format_str[ind]
if within_field:
new_arg = []
while ind < len(format_str):
if c in '}!:':
prev_char = c
within_field = False
new_args.append(''.join(new_arg))
break
new_arg.append(c)
ind += 1
c = format_str[ind]
format_str = ''.join(new_str)
args = new_args
def _format_context(context):
new_args = []
for arg in args:
# LenaKeyError may be raised
new_args.append(lena.context.get_recursively(context, arg))
# other exceptions, like ValueError
# (for bad string formatting) may be raised.
s = format_str.format(*new_args)
return s
return _format_context
_sentinel = object()
[docs]def get_recursively(d, keys, default=_sentinel):
"""Get value from a dictionary *d* recursively.
*keys* can be a list of simple keys (strings),
a dot-separated string
or a dictionary with at most one key at each level.
A string is split by dots and used as a list.
A list of keys is searched in the dictionary recursively
(it represents nested dictionaries).
If any of them is not found, *default* is returned
if "default" is given,
otherwise :exc:`.LenaKeyError` is raised.
If *keys* is empty, *d* is returned.
Examples:
>>> context = {"output": {"latex": {"name": "x"}}}
>>> get_recursively(context, ["output", "latex", "name"], default="y")
'x'
>>> get_recursively(context, "output.latex.name")
'x'
.. note::
Python's dict.get in case of a missing value
returns ``None`` and never raises an error.
We implement it differently,
because it allows more flexibility.
If *d* is not a dictionary or if *keys* is not a string, a dict
or a list, :exc:`.LenaTypeError` is raised.
If *keys* is a dictionary with more than one key at some level,
:exc:`.LenaValueError` is raised.
"""
has_default = default is not _sentinel
if not isinstance(d, dict):
raise lena.core.LenaTypeError(
"need a dictionary, {} provided".format(d)
)
if isinstance(keys, str):
# here empty substrings are skipped, but this is undefined.
keys = [key for key in keys.split('.') if key]
# todo: create dict_to_list and disable dict keys here?
elif isinstance(keys, dict):
new_keys = []
while keys:
if isinstance(keys, dict) and len(keys) != 1:
raise lena.core.LenaValueError(
"keys must have exactly one key at each level, "
"{} given".format(keys)
)
else:
if not isinstance(keys, dict):
new_keys.append(keys)
break
for key in keys:
new_keys.append(key)
keys = keys[key]
break
keys = new_keys
elif isinstance(keys, list):
if not all(isinstance(k, str) for k in keys):
raise lena.core.LenaTypeError(
"all simple keys must be strings, "
"{} given".format(keys)
)
else:
raise lena.core.LenaTypeError(
"keys must be a dict, a string or a list of keys, "
"{} given".format(keys)
)
for key in keys[:-1]:
if key in d and isinstance(d.get(key), dict):
d = d[key]
elif has_default:
return default
else:
raise lena.core.LenaKeyError(
"nested dict {} not found in {}".format(key, d)
)
if not keys:
return d
if keys[-1] in d:
return d[keys[-1]]
elif has_default:
return default
else:
raise lena.core.LenaKeyError(
"nested key {} not found in {}".format(keys[-1], d)
)
[docs]def intersection(*dicts, **kwargs):
"""Return a dictionary, such that each of its items
are contained in all *dicts* (recursively).
*dicts* are several dictionaries.
If *dicts* is empty, an empty dictionary is returned.
A keyword argument *level* sets maximum number of recursions.
For example, if *level* is 0, all *dicts* must be equal
(otherwise an empty dict is returned).
If *level* is 1, the result contains those subdictionaries
which are equal.
For arbitrarily nested subdictionaries set *level* to -1 (default).
Example:
>>> from lena.context import intersection
>>> d1 = {1: "1", 2: {3: "3", 4: "4"}}
>>> d2 = {2: {4: "4"}}
>>> # by default level is -1, which means infinite recursion
>>> intersection(d1, d2) == d2
True
>>> intersection(d1, d2, level=0)
{}
>>> intersection(d1, d2, level=1)
{}
>>> intersection(d1, d2, level=2)
{2: {4: '4'}}
This function always returns a dictionary
or its subtype (copied from dicts[0]).
All values are deeply copied.
No dictionary or subdictionary is changed.
If any of *dicts* is not a dictionary
or if some *kwargs* are unknown,
:exc:`.LenaTypeError` is raised.
"""
if not all([isinstance(d, dict) for d in dicts]):
raise lena.core.LenaTypeError(
"all dicts must be dictionaries, "
"{} given".format(dicts)
)
level = kwargs.pop("level", -1)
if kwargs:
raise lena.core.LenaTypeError(
"unknown kwargs {}".format(kwargs)
)
if not dicts:
return {}
res = copy.deepcopy(dicts[0])
for d in dicts[1:]:
if level == 0:
if d == res and d:
continue
else:
return {}
to_delete = []
for key in res:
if key in d:
if d[key] != res[key]:
if level == 1:
to_delete.append(key)
elif isinstance(res[key], dict) and isinstance(d[key], dict):
res[key] = intersection(res[key], d[key], level=level-1)
else:
to_delete.append(key)
else:
# keys can't be deleted during iteration
to_delete.append(key)
for key in to_delete:
del res[key]
if not res:
# res was calculated empty
return res
return res
def iterate_update(d, updates):
"""Iterate on updates of *d* with *updates*.
*d* is a dictionary. It remains unchanged.
*updates* is a list of dictionaries.
For each element *update*
a copy of *d* updated with *update* is yielded.
If *updates* is empty, nothing is yielded.
"""
# todo: do I need this function?
for update in updates:
d_copy = copy.deepcopy(d)
update_recursively(d_copy, update)
yield d_copy
def make_context(obj, *attrs):
"""Return context for object *obj*.
*attrs* is a list of attributes of *obj* to be inserted
into the context.
If an attribute starts with an underscore '_',
it is inserted without the underscore.
If an attribute is absent or None, it is skipped.
"""
# todo: rename to to_dict
# not used anywhere, change it freely.
# add examples.
context = {}
for attr in attrs:
val = getattr(obj, attr, None)
if val is not None:
if attr.startswith("_"):
attr = attr[1:]
context.update({attr: val})
return context
[docs]def str_to_dict(s, value=_sentinel):
"""Create a dictionary from a dot-separated string *s*.
If the *value* is provided, it becomes the value of
the deepest key represented by *s*.
Dots represent nested dictionaries.
If *s* is non-empty and *value* is not provided,
then *s* must have at least two dot-separated parts
(*"a.b"*), otherwise :exc:`.LenaValueError` is raised.
If a *value* is provided, *s* must be non-empty.
If *s* is empty, an empty dictionary is returned.
Examples:
>>> str_to_dict("a.b.c d")
{'a': {'b': 'c d'}}
>>> str_to_dict("output.changed", True)
{'output': {'changed': True}}
"""
if s == "":
if value is _sentinel:
return {}
else:
raise lena.core.LenaValueError(
"to make a dict with a value, "
"provide at least one dot-separated key"
)
# """*s* can be a dictionary. In this case it is returned as it is.
# If s were a dictionary, value mustn't had been allowed.
# probably this is a bad design,
# elif isinstance(s, dict):
# return s
parts = s.split(".")
if value is not _sentinel:
parts.append(value)
def nest_list(d, l):
"""Convert list *l* to nested dictionaries in *d*."""
len_l = len(l)
if len_l == 2:
d.update([(l[0], l[1])])
elif len_l < 2:
raise lena.core.LenaValueError(
"to make a dict, provide at least two dot-separated values"
)
else:
d.update([(l[0], nest_list({}, l[1:]))])
return d
d = nest_list({}, parts)
return d
[docs]def str_to_list(s):
"""Like :func:`str_to_dict`, but return a flat list.
If the string *s* is empty, an empty list is returned.
This is different from *str.split*: the latter would
return a list with one empty string.
Contrarily to :func:`str_to_dict`, this function allows
an arbitrary number of dots in *s* (or none).
"""
if s == "":
return []
# s can't be a list. This function is not used as a general
# interface (as str_to_dict could be).
# s may contain empty substrings, like in "a..b"
# this is not encouraged, of course, but may suit:
# if there are two errors in some user's context logic,
# they may compensate and not destroy all.
# Another variant would be to treat empty strings
# as whole context. The variant with '' seems more understandable
# to the user.
return s.split(".")
[docs]def update_nested(key, d, other):
"""Update *d[key]* with the *other* dictionary preserving data.
If *d* doesn't contain the *key*, it is updated with *{key: other}*.
If *d* contains the *key*, *d[key]* is inserted into *other[key]*
(so that it is not overriden).
If *other* contains *key* (and possibly more nested *key*-s),
then *d[key]* is inserted into the deepest level
of *other.key.key...* Finally, *d[key]* becomes *other*.
Example:
>>> context = {"variable": {"name": "x"}}
>>> new_var_context = {"name": "n"}
>>> update_nested("variable", context, copy.deepcopy(new_var_context))
>>> context == {'variable': {'name': 'n', 'variable': {'name': 'x'}}}
True
>>>
>>> update_nested("variable", context, {"name": "top"})
>>> context == {
... 'variable': {'name': 'top',
... 'variable': {'name': 'n', 'variable': {'name': 'x'}}}
... }
True
*other* is modified in general. Create that on the fly
or use *copy.deepcopy* when appropriate.
Recursive dictionaries (containing references to themselves)
are strongly discouraged and meaningless when nesting.
If *other[key]* is recursive, :exc:`.LenaValueError` may be raised.
"""
# there was an idea to add a keyword argument copy_other
# (by default True), but the user can do that him/herself
# with copy.deepcopy when needed. Otherwise it would be
# unnecessary complication of this interface.
# Only one key is nested. This encourages design when
# 1) elements combine their contexts into one key
# (like {"split_into_bins": {"variable": {}, "histogram": {}}})
# 2) elements change only one key ("variable", "histogram",...).
def get_most_nested_subdict_with(key, d):
nested_dicts = []
while True:
if key in d:
if d in nested_dicts:
raise lena.core.LenaValueError(
"recursive *other* is forbidden"
)
nested_dicts.append(d)
d = d[key]
else:
return d
if key in d:
other_most_nested = get_most_nested_subdict_with(key, other)
# insert d[key] at the lowest other.key.key....
other_most_nested[key] = d[key]
d[key] = other
[docs]def update_recursively(d, other, value=_sentinel):
"""Update dictionary *d* with items from *other* dictionary.
*other* can be a dot-separated string. In this case
:func:`str_to_dict` is used to convert it and the *value*
to a dictionary.
A *value* argument is allowed only when *other* is a string,
otherwise :exc:`.LenaValueError` is raised.
Existing values are updated recursively,
that is including nested subdictionaries.
Example:
>>> d1 = {"a": 1, "b": {"c": 3}}
>>> d2 = {"b": {"d": 4}}
>>> update_recursively(d1, d2)
>>> d1 == {'a': 1, 'b': {'c': 3, 'd': 4}}
True
>>> # Usual update would have made d1["b"] = {"d": 4}, erasing "c".
Non-dictionary items from *other* overwrite those in *d*:
>>> update_recursively(d1, {"b": 2})
>>> d1 == {'a': 1, 'b': 2}
True
"""
# skip this docstring, because it's trivial.
# Both *d* and *other* must be dictionaries,
# otherwise :exc:`.LenaTypeError` is raised.
# it would be cleaner to allow only dict as other,
# but it's very clear and useful to allow
# lena.context.update_recursively(context, "output.changed", True)
if isinstance(other, str):
other = str_to_dict(other, value)
else:
if value is not _sentinel:
raise lena.core.LenaValueError(
"explicit value is allowed only when other is a string"
)
if not isinstance(d, dict) or not isinstance(other, dict):
raise lena.core.LenaTypeError(
"d and other must be dicts, {} and {} provided".format(d, other)
)
for key, val in other.items():
if not isinstance(val, dict):
d[key] = val
else:
if key in d:
if not isinstance(d[key], dict):
d[key] = {}
update_recursively(d[key], other[key])
else:
d[key] = val