Source code for lena.context.functions

"""Functions to work with context (dictionary)."""

import copy

import lena.core

# pylint: disable=invalid-name
# d is a good name for dictionary,
# used in Python documentation for dict.


[docs]def contains(d, s): """Check that a dictionary *d* contains a subdictionary defined by a string *s*. True if *d* contains a subdictionary that is represented by *s*. Dots in *s* mean nested subdictionaries. A string without dots means a key in *d*. Example: >>> d = {'fit': {'coordinate': 'x'}} >>> contains(d, "fit") True >>> contains(d, "fit.coordinate.x") True >>> contains(d, "fit.coordinate.y") False If the most nested element of *d* to be compared with *s* is not a string, its string representation is used for comparison. See also :func:`str_to_dict`. """ # todo: s can be a list, or a dict? levels = s.split(".") if len(levels) < 2: return s in d subdict = d for key in levels[:-1]: if key not in subdict: return False subdict = subdict[key] last_val = levels[-1] if isinstance(subdict, dict): return last_val in subdict else: # just a value try: # it's better to test for an object to be cast to str # than to disallow "dim.1" subd = str(subdict) except Exception: return False else: return subd == last_val
[docs]def difference(d1, d2, level=-1): """Return a dictionary with items from *d1* not contained in *d2*. *level* sets the maximum depth of recursion. For infinite recursion, set that to -1. For level 1, if a key is present both in *d1* and *d2* but has different values, it is included into the difference. See :func:`intersection` for more details. *d1* and *d2* remain unchanged. However, *d1* or some of its subdictionaries may be returned directly. Make a deep copy of the result when appropriate. .. versionadded:: 0.5 add keyword argument *level*. """ # can become not dicts during the recursion if not isinstance(d1, dict) or not isinstance(d2, dict): return d1 if d1 == d2: return {} elif level == 0: return d1 # some keys differ result = {} for key in d1: if key not in d2: result[key] = d1[key] elif d1[key] != d2[key]: res = difference(d1[key], d2[key], level-1) # if d2[key] contains all d1[key] elements, # the difference will be empty if res: result[key] = res return result
[docs]def format_context(format_str): """Create a function that formats a given string using a context. It is recommended to use jinja2.Template. Use this function only if you don't have jinja2. *format_str* is a Python format string with double braces instead of single ones. It must contain all non-empty replacement fields, and only simplest formatting without attribute lookup. Example: >>> f = format_context("{{x}}") >>> f({"x": 10}) '10' When calling *format_context*, arguments are bound and a new function is returned. When called with a context, its keys are extracted and formatted in *format_str*. Keys can be nested using a dot, for example: >>> f = format_context("{{x.y}}_{{z}}") >>> f({"x": {"y": 10}, "z": 1}) '10_1' This function does not work with unbalanced braces. If a simple check fails, :exc:`.LenaValueError` is raised. If *format_str* is not a string, :exc:`.LenaTypeError` is raised. All other errors are raised only during formatting. If context doesn't contain the needed key, :exc:`.LenaKeyError` is raised. Note that string formatting can also raise a :exc:`ValueError`, so it is recommended to test your formatters before using them. """ if not isinstance(format_str, str): raise lena.core.LenaTypeError( "format_str must be a string, {} given".format(format_str) ) # prohibit single or unbalanced braces if format_str.count('{') != format_str.count('}'): raise lena.core.LenaValueError("unbalanced braces in '{}'".format(format_str)) if '{' in format_str and not '{{' in format_str: raise lena.core.LenaValueError( "double braces must be used for formatting instead of '{}'" .format(format_str) ) # new format: now double braces instead of single ones. # but the algorithm may be left unchanged. format_str = format_str.replace("{{", "{").replace("}}", "}") new_str = [] new_args = [] prev_char = '' ind = 0 within_field = False while ind < len(format_str): c = format_str[ind] if c != '{' and not within_field: prev_char = c new_str.append(c) ind += 1 continue while c == '{' and ind < len(format_str): new_str.append(c) # literal formatting { are not allowed # if prev_char == '{': # prev_char = '' # within_field = False # else: prev_char = c within_field = True ind += 1 c = format_str[ind] if within_field: new_arg = [] while ind < len(format_str): if c in '}!:': prev_char = c within_field = False new_args.append(''.join(new_arg)) break new_arg.append(c) ind += 1 c = format_str[ind] format_str = ''.join(new_str) args = new_args def _format_context(context): new_args = [] for arg in args: # LenaKeyError may be raised new_args.append(lena.context.get_recursively(context, arg)) # other exceptions, like ValueError # (for bad string formatting) may be raised. s = format_str.format(*new_args) return s return _format_context
_sentinel = object()
[docs]def get_recursively(d, keys, default=_sentinel): """Get value from a dictionary *d* recursively. *keys* can be a list of simple keys (strings), a dot-separated string or a dictionary with at most one key at each level. A string is split by dots and used as a list. A list of keys is searched in the dictionary recursively (it represents nested dictionaries). If any of them is not found, *default* is returned if "default" is given, otherwise :exc:`.LenaKeyError` is raised. If *keys* is empty, *d* is returned. Examples: >>> context = {"output": {"latex": {"name": "x"}}} >>> get_recursively(context, ["output", "latex", "name"], default="y") 'x' >>> get_recursively(context, "output.latex.name") 'x' .. note:: Python's dict.get in case of a missing value returns ``None`` and never raises an error. We implement it differently, because it allows more flexibility. If *d* is not a dictionary or if *keys* is not a string, a dict or a list, :exc:`.LenaTypeError` is raised. If *keys* is a dictionary with more than one key at some level, :exc:`.LenaValueError` is raised. """ has_default = default is not _sentinel if not isinstance(d, dict): raise lena.core.LenaTypeError( "need a dictionary, {} provided".format(d) ) if isinstance(keys, str): # here empty substrings are skipped, but this is undefined. keys = [key for key in keys.split('.') if key] # todo: create dict_to_list and disable dict keys here? elif isinstance(keys, dict): new_keys = [] while keys: if isinstance(keys, dict) and len(keys) != 1: raise lena.core.LenaValueError( "keys must have exactly one key at each level, " "{} given".format(keys) ) else: if not isinstance(keys, dict): new_keys.append(keys) break for key in keys: new_keys.append(key) keys = keys[key] break keys = new_keys elif isinstance(keys, list): if not all(isinstance(k, str) for k in keys): raise lena.core.LenaTypeError( "all simple keys must be strings, " "{} given".format(keys) ) else: raise lena.core.LenaTypeError( "keys must be a dict, a string or a list of keys, " "{} given".format(keys) ) for key in keys[:-1]: if key in d and isinstance(d.get(key), dict): d = d[key] elif has_default: return default else: raise lena.core.LenaKeyError( "nested dict {} not found in {}".format(key, d) ) if not keys: return d if keys[-1] in d: return d[keys[-1]] elif has_default: return default else: raise lena.core.LenaKeyError( "nested key {} not found in {}".format(keys[-1], d) )
[docs]def intersection(*dicts, **kwargs): """Return a dictionary, such that each of its items are contained in all *dicts* (recursively). *dicts* are several dictionaries. If *dicts* is empty, an empty dictionary is returned. A keyword argument *level* sets maximum number of recursions. For example, if *level* is 0, all *dicts* must be equal (otherwise an empty dict is returned). If *level* is 1, the result contains those subdictionaries which are equal. For arbitrarily nested subdictionaries set *level* to -1 (default). Example: >>> from lena.context import intersection >>> d1 = {1: "1", 2: {3: "3", 4: "4"}} >>> d2 = {2: {4: "4"}} >>> # by default level is -1, which means infinite recursion >>> intersection(d1, d2) == d2 True >>> intersection(d1, d2, level=0) {} >>> intersection(d1, d2, level=1) {} >>> intersection(d1, d2, level=2) {2: {4: '4'}} This function always returns a dictionary or its subtype (copied from dicts[0]). All values are deeply copied. No dictionary or subdictionary is changed. If any of *dicts* is not a dictionary or if some *kwargs* are unknown, :exc:`.LenaTypeError` is raised. """ if not all([isinstance(d, dict) for d in dicts]): raise lena.core.LenaTypeError( "all dicts must be dictionaries, " "{} given".format(dicts) ) level = kwargs.pop("level", -1) if kwargs: raise lena.core.LenaTypeError( "unknown kwargs {}".format(kwargs) ) if not dicts: return {} res = copy.deepcopy(dicts[0]) for d in dicts[1:]: if level == 0: if d == res and d: continue else: return {} to_delete = [] for key in res: if key in d: if d[key] != res[key]: if level == 1: to_delete.append(key) elif isinstance(res[key], dict) and isinstance(d[key], dict): res[key] = intersection(res[key], d[key], level=level-1) else: to_delete.append(key) else: # keys can't be deleted during iteration to_delete.append(key) for key in to_delete: del res[key] if not res: # res was calculated empty return res return res
def iterate_update(d, updates): """Iterate on updates of *d* with *updates*. *d* is a dictionary. It remains unchanged. *updates* is a list of dictionaries. For each element *update* a copy of *d* updated with *update* is yielded. If *updates* is empty, nothing is yielded. """ # todo: do I need this function? for update in updates: d_copy = copy.deepcopy(d) update_recursively(d_copy, update) yield d_copy def make_context(obj, *attrs): """Return context for object *obj*. *attrs* is a list of attributes of *obj* to be inserted into the context. If an attribute starts with an underscore '_', it is inserted without the underscore. If an attribute is absent or None, it is skipped. """ # todo: rename to to_dict # not used anywhere, change it freely. # add examples. context = {} for attr in attrs: val = getattr(obj, attr, None) if val is not None: if attr.startswith("_"): attr = attr[1:] context.update({attr: val}) return context
[docs]def str_to_dict(s, value=_sentinel): """Create a dictionary from a dot-separated string *s*. If the *value* is provided, it becomes the value of the deepest key represented by *s*. Dots represent nested dictionaries. If *s* is non-empty and *value* is not provided, then *s* must have at least two dot-separated parts (*"a.b"*), otherwise :exc:`.LenaValueError` is raised. If a *value* is provided, *s* must be non-empty. If *s* is empty, an empty dictionary is returned. Examples: >>> str_to_dict("a.b.c d") {'a': {'b': 'c d'}} >>> str_to_dict("output.changed", True) {'output': {'changed': True}} """ if s == "": if value is _sentinel: return {} else: raise lena.core.LenaValueError( "to make a dict with a value, " "provide at least one dot-separated key" ) # """*s* can be a dictionary. In this case it is returned as it is. # If s were a dictionary, value mustn't had been allowed. # probably this is a bad design, # elif isinstance(s, dict): # return s parts = s.split(".") if value is not _sentinel: parts.append(value) def nest_list(d, l): """Convert list *l* to nested dictionaries in *d*.""" len_l = len(l) if len_l == 2: d.update([(l[0], l[1])]) elif len_l < 2: raise lena.core.LenaValueError( "to make a dict, provide at least two dot-separated values" ) else: d.update([(l[0], nest_list({}, l[1:]))]) return d d = nest_list({}, parts) return d
[docs]def str_to_list(s): """Like :func:`str_to_dict`, but return a flat list. If the string *s* is empty, an empty list is returned. This is different from *str.split*: the latter would return a list with one empty string. Contrarily to :func:`str_to_dict`, this function allows an arbitrary number of dots in *s* (or none). """ if s == "": return [] # s can't be a list. This function is not used as a general # interface (as str_to_dict could be). # s may contain empty substrings, like in "a..b" # this is not encouraged, of course, but may suit: # if there are two errors in some user's context logic, # they may compensate and not destroy all. # Another variant would be to treat empty strings # as whole context. The variant with '' seems more understandable # to the user. return s.split(".")
[docs]def update_nested(key, d, other): """Update *d[key]* with the *other* dictionary preserving data. If *d* doesn't contain the *key*, it is updated with *{key: other}*. If *d* contains the *key*, *d[key]* is inserted into *other[key]* (so that it is not overriden). If *other* contains *key* (and possibly more nested *key*-s), then *d[key]* is inserted into the deepest level of *other.key.key...* Finally, *d[key]* becomes *other*. Example: >>> context = {"variable": {"name": "x"}} >>> new_var_context = {"name": "n"} >>> update_nested("variable", context, copy.deepcopy(new_var_context)) >>> context == {'variable': {'name': 'n', 'variable': {'name': 'x'}}} True >>> >>> update_nested("variable", context, {"name": "top"}) >>> context == { ... 'variable': {'name': 'top', ... 'variable': {'name': 'n', 'variable': {'name': 'x'}}} ... } True *other* is modified in general. Create that on the fly or use *copy.deepcopy* when appropriate. Recursive dictionaries (containing references to themselves) are strongly discouraged and meaningless when nesting. If *other[key]* is recursive, :exc:`.LenaValueError` may be raised. """ # there was an idea to add a keyword argument copy_other # (by default True), but the user can do that him/herself # with copy.deepcopy when needed. Otherwise it would be # unnecessary complication of this interface. # Only one key is nested. This encourages design when # 1) elements combine their contexts into one key # (like {"split_into_bins": {"variable": {}, "histogram": {}}}) # 2) elements change only one key ("variable", "histogram",...). def get_most_nested_subdict_with(key, d): nested_dicts = [] while True: if key in d: if d in nested_dicts: raise lena.core.LenaValueError( "recursive *other* is forbidden" ) nested_dicts.append(d) d = d[key] else: return d if key in d: other_most_nested = get_most_nested_subdict_with(key, other) # insert d[key] at the lowest other.key.key.... other_most_nested[key] = d[key] d[key] = other
[docs]def update_recursively(d, other, value=_sentinel): """Update dictionary *d* with items from *other* dictionary. *other* can be a dot-separated string. In this case :func:`str_to_dict` is used to convert it and the *value* to a dictionary. A *value* argument is allowed only when *other* is a string, otherwise :exc:`.LenaValueError` is raised. Existing values are updated recursively, that is including nested subdictionaries. Example: >>> d1 = {"a": 1, "b": {"c": 3}} >>> d2 = {"b": {"d": 4}} >>> update_recursively(d1, d2) >>> d1 == {'a': 1, 'b': {'c': 3, 'd': 4}} True >>> # Usual update would have made d1["b"] = {"d": 4}, erasing "c". Non-dictionary items from *other* overwrite those in *d*: >>> update_recursively(d1, {"b": 2}) >>> d1 == {'a': 1, 'b': 2} True """ # skip this docstring, because it's trivial. # Both *d* and *other* must be dictionaries, # otherwise :exc:`.LenaTypeError` is raised. # it would be cleaner to allow only dict as other, # but it's very clear and useful to allow # lena.context.update_recursively(context, "output.changed", True) if isinstance(other, str): other = str_to_dict(other, value) else: if value is not _sentinel: raise lena.core.LenaValueError( "explicit value is allowed only when other is a string" ) if not isinstance(d, dict) or not isinstance(other, dict): raise lena.core.LenaTypeError( "d and other must be dicts, {} and {} provided".format(d, other) ) for key, val in other.items(): if not isinstance(val, dict): d[key] = val else: if key in d: if not isinstance(d[key], dict): d[key] = {} update_recursively(d[key], other[key]) else: d[key] = val