"""Convert LaTeX to PDF."""
from __future__ import print_function
import collections
import os
import subprocess
import lena.core
import lena.context
[docs]class LaTeXToPDF(object):
"""Run ``pdflatex`` binary for LaTeX files.
It runs in parallel (separate process is spawned for each job)
and non-interactively.
"""
def __init__(self, overwrite=False, verbose=1, create_command=None):
"""*overwrite* sets whether existing unchanged pdfs
shall be overwritten during :meth:`run`.
*verbose = 0* allows no output messages.
1 prints ``pdflatex`` command and output in case of errors.
More than 1 prints all ``pdflatex`` output.
If you need to run ``pdflatex`` (or other executable)
with different parameters, provide its command.
*create_command* is a function which accepts
*texfile_name, outfilename, output_directory, context*
(in this order) and returns a list
made of the command and its arguments.
Default command is:
["pdflatex", "-halt-on-error", "-interaction", "errorstopmode",
"-output-directory", output_directory,
texfile_name]
"""
self._overwrite = overwrite
# todo: make verbose private.
self.verbose = verbose
if create_command and not callable(create_command):
raise lena.core.LenaTypeError(
"create_command must be callable, "
"{} provided.".format(create_command)
)
self.create_command = create_command
# OrderedDict was chosen,
# because it faster to remove elements from that than from a list,
# and because it is natural to iterate processes in FIFO order
self.processes = collections.OrderedDict()
[docs] def run(self, flow):
"""Convert all incoming LaTeX files to pdf.
A *value* from *flow* corresponds to a TeX file
if its *context.output.filetype* is *"tex"*.
Other values pass unchanged.
If the resulting pdf file exists and *context.output.changed*
is set to ``False``, pdf rendering is not run.
If *context.output.changed* is not set, then modification times
for *.tex* and *.pdf* files are compared:
if the template *.tex* is newer, it is reprocessed.
Set the initialization argument *overwrite* to ``True``
to always recreate pdfs.
All non-existent files are always created.
"""
def is_tex_file(context):
"""May be transformed by this class."""
filetype = lena.context.get_recursively(
context, "output.filetype", None
)
if filetype == "tex":
# if not context["output"].get("latex_to_pdf", True):
return True
return False
def pop_returned_processes(processes, verbose=True):
"""Remove returned processes from pool."""
# not to iterate a mutated dictionary!
# make a copy, because keys() are still a dynamic view.
keys = processes.copy().keys()
for filename in keys:
proc, context = processes[filename]
returncode = proc.poll()
if returncode is not None:
# process terminated
## this part is probably unused and untested
if verbose > 1 or (verbose and returncode):
print(stdoutdata.decode())
print(stderrdata.decode())
if returncode:
# an error occurred
del processes[filename]
continue
else:
# terminated well
del processes[filename]
yield (filename, context)
def launch(texfile_name, outfilename, output_directory, context, pool):
"""Add process to pool."""
if self.create_command:
command = self.create_command(texfile_name, outfilename,
output_directory, context)
else:
command = ["pdflatex", "-halt-on-error",
"-interaction", "errorstopmode",
"-output-directory", output_directory,
texfile_name]
command_str = " ".join(command)
if self.verbose:
print(command_str)
process = subprocess.Popen(
command,
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
pool[outfilename] = (process, context)
# todo: probably can delete this line
val = None # if flow is empty
for val in flow:
## check for finished pdfs on each iteration
for out_val in pop_returned_processes(self.processes,
self.verbose):
yield out_val
data, context = lena.flow.get_data_context(val)
if not is_tex_file(context):
yield val
continue
# no deepcopy, because it's a Run element
outputc = context["output"]
outputc["filetype"] = "pdf"
texfile_name = data
data = texfile_name.replace(".tex", ".pdf")
output_directory = os.path.dirname(texfile_name)
try:
changed = outputc["changed"]
except KeyError:
# if context.output.changed is missing, we compare times
# for tex and pdf files.
try:
pdf_time = os.path.getmtime(data)
except os.error:
# probably changed won't be used, but anyway
changed = True
else:
tex_time = os.path.getmtime(texfile_name)
changed = tex_time > pdf_time
if not self._overwrite and os.path.exists(data) and not changed:
# pdf file exists and data is unchanged
outputc["changed"] = False
if self.verbose:
print("# file unchanged, LaTeXToPDF skips {}"\
.format(texfile_name))
yield (data, context)
else:
outputc["changed"] = True
launch(texfile_name, data, output_directory, context,
self.processes)
# this data mustn't be reused
del val
# having read all data, wait for finishing processes
for filename in list(self.processes.keys()):
process, context = self.processes[filename]
val = (filename, context)
try:
stdoutdata, stderrdata = process.communicate()
except KeyboardInterrupt:
print("Interrupting pdflatex...")
if self.verbose:
print("Could not finish", val)
print("Collecting finished pdfs...")
for val in pop_returned_processes(self.processes,
verbose=self.verbose):
yield val
# kill not finished processes
for key in self.processes:
process, _ = self.processes[key]
process.terminate()
self.processes.clear()
raise StopIteration
else:
returncode = process.returncode
if self.verbose:
# In fact, it's not possible
# to distinguish stdout from stderr,
# because pdflatex writes errors to stdout...
# stdoutdata is bytes object.
# Need to decode that to get newlines.
if returncode or self.verbose > 1:
print(stdoutdata.decode())
if stderrdata:
print(stderrdata.decode())
if not returncode:
yield val
self.processes.clear()