Source code for lena.flow.group_by

"""Group data using :class:`.GroupBy` class."""
import lena.core
import lena.flow


[docs]class GroupBy(object): """Group values. Data is added during :meth:`update`. Groups dictionary is available as :attr:`groups` attribute. :attr:`groups` is a mapping of *keys* (defined by *group_by*) to lists of items with the same key. """ def __init__(self, group_by): """*group_by* is a function that returns distinct hashable results for values from different groups. It can be also a dot-separated formatting string. In that case only the context part of the value is used (see :func:`context.format_context <.format_context>`). If *group_by* is not a callable or a string, :exc:`.LenaTypeError` is raised. """ self.groups = dict() if callable(group_by): # callable(value) is allowed for generality. # I use group_by exclusively with context, # and the only example I can imagine when it can probe value # is histograms with same variables # but with different ranges (one wouldn't be able # to plot graphs with them without changing context though). # This is a weak example, because this information # could be added to context. self._group_by = group_by elif isinstance(group_by, str): fc = lena.context.format_context(group_by) self._group_by = lambda val: fc(lena.flow.get_context(val)) else: raise lena.core.LenaTypeError( "group_by must be a callable or a string, " "{} provided".format(group_by) )
[docs] def update(self, val): """Find a group for *val* and add it there. A group key is calculated by *group_by*. If no such key exists, a new group is created. If a formatting key was not found for *val*, :exc:`~LenaValueError` is raised. """ try: key = self._group_by(val) except lena.core.LenaKeyError: raise lena.core.LenaValueError( "could not find a key for {}".format(val) ) if key in self.groups: self.groups[key].append(val) else: self.groups[key] = [val]
[docs] def clear(self): """Remove all groups.""" self.groups.clear()