Исходный код lena.output.write

"""Write data to filesystem."""
import os
import sys
import warnings

import lena.context
import lena.core
import lena.flow


[документация]def Writer(*args, **kwargs): """ .. deprecated:: 0.4 use :class:`Write`. """ warnings.warn("Writer is deprecated since Lena 0.4. Use Write. In:", DeprecationWarning, stacklevel=2) return Write(*args, **kwargs)
[документация]class Write(object): """Write text data to filesystem.""" def __init__(self, output_directory, output_filename="output", verbose=True, existing_unchanged=False, overwrite=False): """*output_directory* is the base output directory. It can be further appended by the incoming data. Non-existing directories are created. *output_filename* is the name for unnamed data. Use it to write only one file. If no arguments are given, the default is to write to "output.txt" in the current directory (rewritten for every new value) (unless different extensions are provided through the context). It is recommended to create filename explicitly using :class:`.MakeFilename`. The default writer's output file is useful in case of errors, when explicit file name didn't work. *verbose* sets whether additional information should be printed on the screen. *verbose* set to ``False`` disables runtime messages. *existing_unchanged* and *overwrite* are used during :meth:`run` to change the handling of existing files. These options are mutually exclusive: their simultaneous use raises :exc:`.LenaValueError`. """ if (not isinstance(output_directory, str) or not isinstance(output_filename, str)): raise lena.core.LenaTypeError( "output_directory and output_filename must be strings, " "{} and {} given".format(output_directory, output_filename) ) self._output_filename = output_filename self._orig_outdir = output_directory self.output_directory = output_directory if '{' in output_directory: self._format_context = lena.context.format_context( output_directory ) # verbose is boolean, because for more detailed information # one can use a Print element. self._verbose = verbose if existing_unchanged and overwrite: raise lena.core.LenaValueError( "existing_unchanged and overwrite are mutually exclusive" ) self._existing_unchanged = existing_unchanged self._overwrite = overwrite def _make_filename(self, outputc): dirname = outputc.get("dirname", "") # dirname is always relative to self.output_directory # get file extension if "filetype" in outputc and "fileext" not in outputc: fileext = outputc["filetype"] else: fileext = outputc.get("fileext", "txt") if "filename" in outputc: # get file name filename = outputc["filename"] if not filename: raise lena.core.LenaRuntimeError( "empty filename in context.output" ) else: filename = self._output_filename # if filename is None: # raise (,) # filepath is created if fileext: filepath = filename + "." + fileext else: filepath = filename def normalize_path(path_name, path): if os.path.isabs(path): warnings.warn( "{} should not be an absolute path, {} given" .format(path_name, path), RuntimeWarning ) if path.startswith(os.sep): # there can be also os.altsep for some fancy systems path = path[len(os.sep):] assert not os.path.isabs(path) return path dirname = normalize_path("dirname", dirname) filepath = normalize_path("filename", filepath) filepath = os.path.join(self.output_directory, dirname, filepath) return (dirname, filename, fileext, filepath)
[документация] def run(self, flow): """Only strings (and `unicode` in Python 2) and objects with a method *write* are written. Method *write* must accept a string with output file path as an argument. If *context["output"]["write"]* is set to ``False``, a value will not be written. Not written values pass unchanged. Full name of the file to be written (*filepath*) has the form *self.output_directory/dirname/filename.fileext*, where *dirname*, *filename* and file extension *fileext* are searched in *context["output"]*. If *filename* is missing, *Write*'s default filename is used. If *fileext* is missing, then *filetype* is used; if it is also absent, the default file extension is "txt". It is usually enough to provide *fileext*. If the resulting file exists and its content is the same as the incoming data, file is not overwritten (unless it was produced with an object's method *write*, which doesn't allow to learn whether the file has changed). If *existing_unchanged* is ``True``, existing file contents are not checked (they are assumed to be not changed). If *overwrite* is ``True``, file contents are not checked, and all data is assumed to be changed. If a file was written, then *output.changed* is set to ``True``, otherwise, if it was not set before, it is set to ``False``. If in that case *output.changed* existed, it retains its previous value. Example: suppose you have a sequence *(Histogram, ToCSV, Write, RenderLaTeX, Write, LaTeXToPDF)*. If both histogram representation and LaTeX template exist and are unchanged, the second *Write* signals *context.output.changed=False*, and LaTeXToPDF doesn't regenerate the plot. If LaTeX template was unchanged, but the previous context from the first *Write* signals *context.output.changed=True*, then in the second *Write* template is not rewritten, but *context.output.changed* remains ``True``. On the second run, even if we check file contents, the program will run faster for unchanged files even for :class:`Write`, because read speed is typically higher than write speed. File name with full path is yielded as data. *context.output* is updated with *fileext* and *filename* (in case they were not present), and *filepath*, where *filename* is its base part (without output directory and extension) and *filepath* is the complete path. If data is equal to *context.output.filepath*, this means that the file was already written by another *Write*, and the value is skipped (yielded unchanged). If *context.output.filename* is present but empty, :exc:`.LenaRuntimeError` is raised. """ def is_writable(data, context): # context doesn't forbid writing if lena.context.get_recursively(context, "output.write", True)\ is False: return False # data allows writing if hasattr(data, "write") and callable(data.write): return True # check strings if not isinstance(data, str): if sys.version_info.major == 3: return False elif not isinstance(data, basestring): # elif not isinstance(data, unicode): return False return True for val in flow: data, context = lena.flow.get_data_context(val) if not is_writable(data, context): yield val continue # write output if "output" not in context: context["output"] = {} outputc = context["output"] try: dirname, filename, fileext, filepath = self._make_filename(outputc) except lena.core.LenaRuntimeError: raise lena.core.LenaRuntimeError( "could not make output file name from {}".format(val) ) # this file path was already written by another Write, # hence it is skipped. # Another option would be to create a special class # for file paths to be used as a data part of a value. # The used variant is less general, but practical. if data == filepath: yield val continue # dirname is not changed, no need to update it outputc["filename"] = filename outputc["fileext"] = fileext outputc["filepath"] = filepath # if nothing explicitly stated changes, data is unchanged changed = outputc.get("changed", False) if hasattr(data, "write") and callable(data.write): # todo: allow to check for method has_changed # - allow to use write options # from context.output.write_args # and context.output.write_kwargs # - allow existing_unchanged. data.write(filepath) outputc["changed"] = True yield (filepath, context) continue if os.path.exists(filepath): if self._existing_unchanged: outputc["changed"] = changed if self._verbose: print("# file unchanged, Write skips {}"\ .format(filepath)) yield (filepath, context) continue if self._overwrite: self._write_data(filepath, data) outputc["changed"] = True yield (filepath, context) continue # write if existing data differs with written with open(filepath) as fil: # read has an optional argument # size = maximum size (bytes) to read # negative or omitted means whole content size = -1 # security warning: say an adversary creates # a huge file and adds its path to context (!). # Then the program will crash # trying to read that in memory. existing_data = fil.read(size) if data != existing_data: self._write_data(filepath, data) outputc["changed"] = True else: # False, unless explicitly set to True if self._verbose: print("# file unchanged, Write skips {}"\ .format(filepath)) outputc["changed"] = changed else: # create containing directory curdir = os.path.dirname(filepath) if not os.path.exists(curdir): # race condition is possible if the directory is created # after it was checked for existence. Ignore now. os.makedirs(curdir) self._write_data(filepath, data) yield (filepath, context)
def _set_context(self, context): # we add this method to all Write objects for uniformity. # Advised by Aaron Hall, # https://stackoverflow.com/a/28060251/952234 if '{' not in self._orig_outdir: return # set static context to format the output directory name try: outdir = self._format_context(context) except lena.core.LenaKeyError: pass else: self.output_directory = outdir def _write_data(self, filepath, data): # write output to filesystem # todo: allow binary files with open(filepath, "w") as fil: fil.write(data)
# we write only strings, so no TypeError is expected. # If one occurs, the user will note that. # except TypeError: # raise lena.core.LenaTypeError( # "can't write data {} to file {}" # .format(data, filepath) # )