Source code for lena.structures.graph

"""A graph is a function at given coordinates."""
import copy
import functools
import operator
import re
import warnings

import lena.core
import lena.context
import lena.flow


[docs]class graph(): """Numeric arrays of equal size.""" def __init__(self, coords, field_names=("x", "y"), scale=None): """This structure generally corresponds to the graph of a function and represents arrays of coordinates and the function values of arbitrary dimensions. *coords* is a list of one-dimensional coordinate and value sequences (usually lists). There is little to no distinction between them, and "values" can also be called "coordinates". *field_names* provide the meaning of these arrays. For example, a 3-dimensional graph could be distinguished from a 2-dimensional graph with errors by its fields ("x", "y", "z") versus ("x", "y", "error_y"). Field names don't affect drawing graphs: for that :class:`~Variable`-s should be used. Default field names, provided for the most used 2-dimensional graphs, are "x" and "y". *field_names* can be a string separated by whitespace and/or commas or a tuple of strings, such as ("x", "y"). *field_names* must have as many elements as *coords* and each field name must be unique. Otherwise field names are arbitrary. Error fields must go after all other coordinates. Name of a coordinate error is "error\\_" appended by coordinate name. Further error details are appended after '_'. They could be arbitrary depending on the problem: "low", "high", "low_90%_cl", etc. Example: ("E", "time", "error_E_low", "error_time"). *scale* of the graph is a kind of its norm. It could be the integral of the function or its other property. A scale of a normalised probability density function would be one. An initialized *scale* is required if one needs to renormalise the graph in :meth:`scale` (for example, to plot it with other graphs). Coordinates of a function graph would usually be arrays of increasing values, which is not required here. Neither is it checked that coordinates indeed contain one-dimensional numeric values. However, non-standard graphs will likely lead to errors during plotting and will require more programmer's work and caution, so use them only if you understand what you are doing. A graph can be iterated yielding tuples of numbers for each point. **Attributes** :attr:`coords` is a list \ of one-dimensional lists of coordinates. :attr:`field_names` :attr:`dim` is the dimension of the graph, that is of all its coordinates without errors. In case of incorrect initialization arguments, :exc:`~.LenaTypeError` or :exc:`~.LenaValueError` is raised. .. versionadded:: 0.5 """ if not coords: raise lena.core.LenaValueError( "coords must be a non-empty sequence " "of coordinate sequences" ) # require coords to be of the same size pt_len = len(coords[0]) for arr in coords[1:]: if len(arr) != pt_len: raise lena.core.LenaValueError( "coords must have subsequences of equal lengths" ) # Unicode (Python 2) field names would be just bad, # so we don't check for it here. if isinstance(field_names, str): # split(', ') won't work. # From https://stackoverflow.com/a/44785447/952234: # \s stands for whitespace. field_names = tuple(re.findall(r'[^,\s]+', field_names)) elif not isinstance(field_names, tuple): # todo: why field_names are a tuple, # while coords are a list? # It might be non-Pythonic to require a tuple # (to prohibit a list), but it's important # for comparisons and uniformity raise lena.core.LenaTypeError( "field_names must be a string or a tuple" ) if len(field_names) != len(coords): raise lena.core.LenaValueError( "field_names must have must have the same size as coords" ) if len(set(field_names)) != len(field_names): raise lena.core.LenaValueError( "field_names contains duplicates" ) self.coords = coords self._scale = scale # field_names are better than fields, # because they are unambigous (as in namedtuple). self.field_names = field_names # decided to use "error_x_low" (like in ROOT). # Other versions were x_error (looked better than x_err), # but x_err_low looked much better than x_error_low). try: parsed_error_names = self._parse_error_names(field_names) except lena.core.LenaValueError as err: raise err # in Python 3 # raise err from None self._parsed_error_names = parsed_error_names dim = len(field_names) - len(parsed_error_names) self._coord_names = field_names[:dim] self.dim = dim # todo: add subsequences of coords as attributes # with field names. # In case if someone wants to create a graph of another function # at the same coordinates. # Should a) work when we rescale the graph # b) not interfere with other fields and methods # Probably we won't add methods __del__(n), __add__(*coords), # since it might change the scale. def __eq__(self, other): """Two graphs are equal, if and only if they have equal coordinates, field names and scales. If *other* is not a :class:`.graph`, return ``False``. Note that floating numbers should be compared approximately (using :func:`math.isclose`). Therefore this comparison may give false negatives. """ if not isinstance(other, graph): # in Python comparison between different types is allowed return False return (self.coords == other.coords and self._scale == other._scale and self.field_names == other.field_names) def _get_err_indices(self, coord_name): """Get error indices corresponding to a coordinate.""" err_indices = [] dim = self.dim for ind, err in enumerate(self._parsed_error_names): if err[1] == coord_name: err_indices.append(ind+dim) return err_indices def __iter__(self): """Iterate graph coords one by one.""" for val in zip(*self.coords): yield val def __repr__(self): return """graph({}, field_names={}, scale={})""".format( self.coords, self.field_names, self._scale )
[docs] def scale(self, other=None): """Get or set the scale of the graph. If *other* is ``None``, return the scale of this graph. If a numeric *other* is provided, rescale to that value. If the graph has unknown or zero scale, rescaling that will raise :exc:`~.LenaValueError`. To get meaningful results, graph's fields are used. Only the last coordinate is rescaled. For example, if the graph has *x* and *y* coordinates, then *y* will be rescaled, and for a 3-dimensional graph *z* will be rescaled. All errors are rescaled together with their coordinate. """ # this method is called scale() for uniformity with histograms # And this looks really good: explicit for computations # (not a subtle graph.scale, like a constant field (which is, # however, the case in graph - but not in other structures)) # and easy to remember (set_scale? rescale? change_scale_to?..) # We modify the graph in place, # because that would be redundant (not optimal) # to create a new graph # if we only want to change the scale of the existing one. if other is None: return self._scale if not self._scale: raise lena.core.LenaValueError( "can't rescale a graph with zero or unknown scale" ) last_coord_ind = self.dim - 1 last_coord_name = self.field_names[last_coord_ind] last_coord_indices = ([last_coord_ind] + self._get_err_indices(last_coord_name) ) # In Python 2 3/2 is 1, so we want to be safe; # the downside is that integer-valued graphs # will become floating, but that is doubtfully an issue. # Remove when/if dropping support for Python 2. rescale = float(other) / self._scale mul = operator.mul partial = functools.partial # a version with lambda is about 50% slower: # timeit.timeit('[*map(lambda val: val*2, vals)]', \ # setup='vals = list(range(45)); from operator import mul; \ # from functools import partial') # 3.159 # same setup for # timeit.timeit('[*map(partial(mul, 2), vals)]',...): # 2.075 # # [*map(...)] is very slightly faster than list(map(...)), # but it's unavailable in Python 2 (and anyway less readable). # rescale arrays of values and errors for ind, arr in enumerate(self.coords): if ind in last_coord_indices: # Python lists are faster than arrays, # https://stackoverflow.com/a/62399645/952234 # (because each time taking a value from an array # creates a Python object) self.coords[ind] = list(map(partial(mul, rescale), arr)) self._scale = other # as suggested in PEP 8 return None
def _parse_error_names(self, field_names): # field_names is a parameter for easier testing, # usually object's field_names are used. errors = [] # collect all error fields and check that they are # strictly after other fields in_error_fields = False # there is at least one field last_coord_ind = 0 for ind, field in enumerate(field_names): if field.startswith("error_"): in_error_fields = True errors.append((field, ind)) else: last_coord_ind = ind if in_error_fields: raise lena.core.LenaValueError( "errors must go after coordinate fields" ) coords = set(field_names[:last_coord_ind+1]) parsed_errors = [] for err, ind in errors: err_coords = [] for coord in coords: err_main = err[6:] # all after "error_" if err_main == coord or err_main.startswith(coord + "_"): err_coords.append(coord) err_tail = err_main[len(coord)+1:] if not err_coords: raise lena.core.LenaValueError( "no coordinate corresponding to {} given".format(err) ) elif len(err_coords) > 1: raise lena.core.LenaValueError( "ambiguous error " + err +\ " corresponding to several coordinates given" ) # "error" may be redundant, but it is explicit. parsed_errors.append(("error", err_coords[0], err_tail, ind)) return parsed_errors def _update_context(self, context): """Update *context* with the properties of this graph. *context.error* is appended with indices of errors. Example subcontext for a graph with fields "E,t,error_E_low": {"error": {"x_low": {"index": 2}}}. Note that error names are called "x", "y" and "z" (this corresponds to first three coordinates, if they are present), which allows to simplify plotting. Existing values are not removed from *context.value* and its subcontexts. Called on "destruction" of the graph (for example, in :class:`.ToCSV`). By destruction we mean conversion to another structure (like text) in the flow. The graph object is not really destroyed in this process. """ # this method is private, because we encourage users to yield # graphs into the flow and process them with ToCSV element # (not manually). if not self._parsed_error_names: # no error fields present return dim = self.dim xyz_coord_names = self._coord_names[:3] for name, coord_name in zip(["x", "y", "z"], xyz_coord_names): for err in self._parsed_error_names: if err[1] == coord_name: error_ind = err[3] if err[2]: # add error suffix error_name = name + "_" + err[2] else: error_name = name lena.context.update_recursively( context, "error.{}.index".format(error_name), # error can correspond both to variable and # value, so we put it outside value. # "value.error.{}.index".format(error_name), error_ind ) # emulating numeric types def __add__(self, other): """Add last (highest) coordinates of two graphs. A new graph is returned. Error fields are ignored. """ # todo: make it method add(.., calculate_error=...) if not isinstance(other, graph): return NotImplemented # but their errors may be different assert self.dim == other.dim dim = self.dim # copied from scale last_coord_ind = self.dim - 1 last_coord_name = self.field_names[last_coord_ind] last_coord_indices = ( [last_coord_ind] + self._get_err_indices(last_coord_name) ) all_same = all(((len(self.coords[i]) == len(other.coords[i])) for i in range(dim - 1))) assert all_same new_coords = [copy.copy(self.coords[i]) for i in range(dim - 1)] new_vals = [ self.coords[last_coord_ind][i] + other.coords[last_coord_ind][i] for i in range(len(self.coords[last_coord_ind])) ] # add can't use zipped values # new_vals = list(map(operator.add, zip(self.coords[last_coord_ind], # other.coords[last_coord_ind]))) new_coords.append(new_vals) try: scale0 = self.scale() scale1 = other.scale() except lena.core.LenaValueError: scale = None else: if scale0 is not None and scale1 is not None: scale = scale0 + scale1 else: scale = None return graph(coords=new_coords, field_names=self.field_names, scale=scale)
# for ind, arr in enumerate(self.coords): # if ind in last_coord_indices: # self.coords[ind] = list(map(partial(mul, rescale), # arr)) # used in deprecated Graph def _rescale_value(rescale, value): return rescale * lena.flow.get_data(value)
[docs]class Graph(object): """ .. deprecated:: 0.5 use :class:`graph`. This class may be used in the future, but with a changed interface. Function at given coordinates (arbitraty dimensions). Graph points can be set during the initialization and during :meth:`fill`. It can be rescaled (producing a new :class:`Graph`). A point is a tuple of *(coordinate, value)*, where both *coordinate* and *value* can be tuples of numbers. *Coordinate* corresponds to a point in N-dimensional space, while *value* is some function's value at this point (the function can take a value in M-dimensional space). Coordinate and value dimensions must be the same for all points. One can get graph points as :attr:`Graph.points` attribute. They will be sorted each time before return if *sort* was set to ``True``. An attempt to change points (use :attr:`Graph.points` on the left of '=') will raise Python's :exc:`AttributeError`. """ def __init__(self, points=None, context=None, scale=None, sort=True): """*points* is an array of *(coordinate, value)* tuples. *context* is the same as the most recent context during *fill*. Use it to provide a context when initializing a :class:`Graph` from existing points. *scale* sets the scale of the graph. It is used during plotting if rescaling is needed. Graph coordinates are sorted by default. This is usually needed to plot graphs of functions. If you need to keep the order of insertion, set *sort* to ``False``. By default, sorting is done using standard Python lists and functions. You can disable *sort* and provide your own sorting container for *points*. Some implementations are compared `here <http://www.grantjenks.com/docs/sortedcontainers/performance.html>`_. Note that a rescaled graph uses a default list. Note that :class:`Graph` does not reduce data. All filled values will be stored in it. To reduce data, use histograms. """ warnings.warn("Graph is deprecated since Lena 0.5. Use graph.", DeprecationWarning, stacklevel=2) self._points = points if points is not None else [] # todo: add some sanity checks for points self._scale = scale self._init_context = {"scale": scale} if context is None: self._cur_context = {} elif not isinstance(context, dict): raise lena.core.LenaTypeError( "context must be a dict, {} provided".format(context) ) else: self._cur_context = context self._sort = sort # todo: probably, scale from context is not needed. ## probably this function is not needed. ## it can't be copied, graphs won't be possible to compare. # *rescale_value* is a function, which can be used to scale # complex graph values. # It must accept a rescale parameter and the value at a data point. # By default, it is multiplication of rescale and the value # (which must be a number). # if rescale_value is None: # self._rescale_value = _rescale_value self._rescale_value = _rescale_value self._update()
[docs] def fill(self, value): """Fill the graph with *value*. *Value* can be a *(data, context)* tuple. *Data* part must be a *(coordinates, value)* pair, where both coordinates and value are also tuples. For example, *value* can contain the principal number and its precision. """ point, self._cur_context = lena.flow.get_data_context(value) # coords, val = point self._points.append(point)
[docs] def request(self): """Yield graph with context. If *sort* was initialized ``True``, graph points will be sorted. """ # If flow contained *scale* it the context, it is set now. self._update() yield (self, self._context)
# compute method shouldn't be in this class, # because it is a pure FillRequest. # def compute(self): # """Yield graph with context (as in :meth:`request`), # and :meth:`reset`.""" # self._update() # yield (self, self._context) # self.reset() @property def points(self): """Get graph points (read only).""" # sort points before giving them self._update() return self._points
[docs] def reset(self): """Reset points to an empty list and current context to an empty dict. """ self._points = [] self._cur_context = {}
def __repr__(self): self._update() return ("Graph(points={}, scale={}, sort={})" .format(self._points, self._scale, self._sort))
[docs] def scale(self, other=None): """Get or set the scale. Graph's scale comes from an external source. For example, if the graph was computed from a function, this may be its integral passed via context during :meth:`fill`. Once the scale is set, it is stored in the graph. If one attempts to use scale which was not set, :exc:`.LenaAttributeError` is raised. If *other* is None, return the scale. If a ``float`` *other* is provided, rescale to *other*. A new graph with the scale equal to *other* is returned, the original one remains unchanged. Note that in this case its *points* will be a simple list and new graph *sort* parameter will be ``True``. Graphs with scale equal to zero can't be rescaled. Attempts to do that raise :exc:`.LenaValueError`. """ if other is None: # return scale self._update() if self._scale is None: raise lena.core.LenaAttributeError( "scale must be explicitly set before using that" ) return self._scale else: # rescale from other scale = self.scale() if scale == 0: raise lena.core.LenaValueError( "can't rescale graph with 0 scale" ) # new_init_context = copy.deepcopy(self._init_context) # new_init_context.update({"scale": other}) rescale = float(other) / scale new_points = [] for coord, val in self._points: # probably not needed, because tuples are immutable: # make a deep copy so that new values # are completely independent from old ones. new_points.append((coord, self._rescale_value(rescale, val))) # todo: should it inherit context? # Probably yes, but watch out scale. new_graph = Graph(points=new_points, scale=other, sort=self._sort) return new_graph
[docs] def to_csv(self, separator=",", header=None): """.. deprecated:: 0.5 in Lena 0.5 to_csv is not used. Iterables are converted to tables. Convert graph's points to CSV. *separator* delimits values, the default is comma. *header*, if not ``None``, is the first string of the output (new line is added automatically). Since a graph can be multidimensional, for each point first its coordinate is converted to string (separated by *separator*), then each part of its value. To convert :class:`Graph` to CSV inside a Lena sequence, use :class:`lena.output.ToCSV`. """ if self._sort: self._update() def unpack_pt(pt): coord = pt[0] value = pt[1] if isinstance(coord, tuple): unpacked = list(coord) else: unpacked = [coord] if isinstance(value, tuple): unpacked += list(value) else: unpacked.append(value) return unpacked def pt_to_str(pt, separ): return separ.join([str(val) for val in unpack_pt(pt)]) if header is not None: # if one needs an empty header line, they may provide "" lines = header + "\n" else: lines = "" lines += "\n".join([pt_to_str(pt, separator) for pt in self.points]) return lines
# *context* will be added to graph context. # If it contains "scale", :meth:`scale` method will be available. # Otherwise, if "scale" is contained in the context # during :meth:`fill`, it will be used. # In this case it is assumed that this scale # is same for all values (only the last filled context is checked). # Context from flow takes precedence over the initialized one. def _update(self): """Sort points if needed, update context.""" # todo: probably remove this context_scale? context_scale = self._cur_context.get("scale") if context_scale is not None: # this complex check is fine with rescale, # because that returns a new graph (this scale unchanged). if self._scale is not None and self._scale != context_scale: raise lena.core.LenaRuntimeError( "Initialization and context scale differ, " "{} and {} from context {}" .format(self._scale, context_scale, self._cur_context) ) self._scale = context_scale if self._sort: self._points = sorted(self._points) self._context = copy.deepcopy(self._cur_context) self._context.update(self._init_context) # why this? Not *graph.scale*? self._context.update({"scale": self._scale}) # todo: make this check during fill. Probably initialize self._dim # with kwarg dim. (dim of coordinates or values?) if self._points: # check points correctness points = self._points def coord_dim(coord): if not hasattr(coord, "__len__"): return 1 return len(coord) first_coord = points[0][0] dim = coord_dim(first_coord) same_dim = all(coord_dim(point[0]) == dim for point in points) if not same_dim: raise lena.core.LenaValueError( "coordinates tuples must have same dimension, " "{} given".format(points) ) self.dim = dim self._context["dim"] = self.dim def __eq__(self, other): if not isinstance(other, Graph): return False if self.points != other.points: return False if self._scale is None and other._scale is None: return True try: result = self.scale() == other.scale() except lena.core.LenaAttributeError: # one scale couldn't be computed return False else: return result