Source code for lexor.core.converter

"""Converter Module

Provides the `Converter` object which defines the basic mechanism for
converting the objects defined in `lexor.core.elements`. This
involves using objects derived from the abstract class
`NodeConverter`.

"""

import sys
import os.path as pth
import traceback
from imp import load_source
from cStringIO import StringIO
from lexor.command import config
from lexor.command.lang import get_style_module, map_explanations
LC = sys.modules['lexor.core']


[docs]def get_converter_namespace(): """Many converters may be defined during the conversion of a document. In some cases we may need to save references to objects in documents. If this is the case, then call this function to obtain the namespace where you can save those references. """ return get_converter_namespace.namespace
if not hasattr(get_converter_namespace, 'namespace'): get_converter_namespace.namespace = dict()
[docs]class NodeConverter(object): """A node converter is an object which determines if the node will be copied (default). To avoid copying the node simply declare copy = False when deriving a node converter. Note that by default, the children of the node (if any) will be copied and assigned to the parent. To avoid copying the children then set copy_children = False """ copy = True copy_children = True def __init__(self, converter): """A `NodeConverter` needs to be initialized with a converter object. If this method is to be overloaded then make sure that it only accepts one parameter: `converter`. This method is used by `Converter` and it calls it with itself as the parameter. """ self.converter = converter @classmethod
[docs] def start(cls, node): """This method gets called only if `copy` is set to True (default). By overloading this method you have access to the converter and the node. You can thus set extra variables in the converter or modify the node. DO NOT modify any of the parents of the node. If there is a need to modify any of parents of the node then set a variable in the converter to point to the node so that later on in the `convert` function it can be modified. """ return node
@classmethod
[docs] def end(cls, node): """This method gets called after all the children have been copied. Make sure to return the node or the node replacement. """ return node
[docs] def msg(self, code, node, arg=None, uri=None): """Send a message to the converter. """ self.converter.msg(self.__module__, code, node, arg, uri) # pylint: disable=R0903
[docs]class BaseLog(object): """A simple class to provide messages to a converter. You must derive an object from this class in the module which will be issuing the messages. For instance: class Log(BaseLog): pass After that you can create a new object and use it in a module. log = Log(converter) where `converter` is a `Converter` provided to the module. Make sure that the module contains the objects `MSG` and `MSG_EXPLANATION`. """ def __init__(self, converter): self.converter = converter
[docs] def msg(self, code, arg=None, uri=None): """Send a message to the converter. """ self.converter.msg(self.__module__, code, None, arg, uri) # The default of 7 attributes for class is too restrictive. # pylint: disable=R0902
[docs]class Converter(object): """To see the languages available to the `Converter` see the `lexor.lang` module. """ def __init__(self, fromlang='xml', tolang='xml', style='default', defaults=None): """Create a new `Converter` by specifying the language and the style in which `Node` objects will be written. """ if defaults is None: defaults = dict() self._fromlang = fromlang self._tolang = tolang self._style = style self._nc = None self._node_converter = None self._convert_func = None self._reload = True self.style_module = None self.doc = list() self.log = list() self.defaults = defaults @property def convert_from(self): """The language from which the converter will convert. """ return self._fromlang @convert_from.setter
[docs] def convert_from(self, value): """Setter function for convert_from. """ self._fromlang = value self._reload = True
@property def convert_to(self): """The language to which the converter will convert. """ return self._tolang @convert_to.setter
[docs] def convert_to(self, value): """Setter function for convert_to. """ self._tolang = value self._reload = True
@property def converting_style(self): """The converter style. """ return self._style @converting_style.setter
[docs] def converting_style(self, value): """Setter function for converting_style. """ self._style = value self._reload = True
[docs] def set(self, fromlang, tolang, style, defaults=None): """Sets the languages and styles in one call. """ if defaults is not None: self.defaults = defaults self._style = style self._tolang = tolang self._fromlang = fromlang self._reload = True
[docs] def match_info(self, fromlang, tolang, style, defaults=None): """Check to see if the converter main information matches.""" match = True if defaults is not None: match = False elif fromlang not in [self._fromlang]: match = False elif tolang not in [self._tolang]: match = False elif style not in [self._style]: match = False return match
@property
[docs] def lexor_log(self): """The `lexorlog` document. See this document after each call to `convert` to see warnings and errors. """ return self.log[-1]
@property
[docs] def document(self): """The parsed document. This is a `Document` or `FragmentedDocument` created by the `convert` method. """ return self.doc[-1]
[docs] def pop(self): """Remove the last document and last log document and return them.""" return self.doc.pop(), self.log.pop()
[docs] def convert(self, doc, namespace=False): """Convert the `Document` doc. """ if not isinstance(doc, (LC.Document, LC.DocumentFragment)): raise TypeError("The node is not a Document or DocumentFragment") if self._reload: self._set_node_converters( self._fromlang, self._tolang, self._style, self.defaults ) self._reload = False self.log.append(LC.Document("lexor", "log")) self.log[-1].modules = dict() self.log[-1].explanation = dict() self._convert(doc) if hasattr(self.style_module, 'convert'): self.style_module.convert(self, self.doc[-1]) map_explanations(self.log[-1].modules, self.log[-1].explanation) if not namespace: del self.doc[-1].namespace return self.doc[-1], self.log[-1]
@staticmethod
[docs] def remove_node(node): """Removes the node from the current document it is in. Returns the previous sibling is possible, otherwise it returns an empty Text node. """ parent = node.parent index = node.index del node.parent[node.index] try: if index - 1 > -1: return parent[index-1] else: raise IndexError except IndexError: parent.append_child('') return parent[0] # pylint: disable=R0913
[docs] def msg(self, mod_name, code, node, arg=None, uri=None): """Provide the name of module issuing the message, the code number, the node with the error, optional arguments and uri. This information gets stored in the log. """ if uri is None: uri = self.doc[-1].uri_ if arg is None: arg = () wnode = LC.Void('msg') wnode['module'] = mod_name wnode['code'] = code wnode['node_id'] = id(node) wnode.node = node try: wnode['uri'] = node['uri'] del node['uri'] except (KeyError, TypeError): wnode['uri'] = uri wnode['arg'] = arg if mod_name not in self.log[-1].modules: self.log[-1].modules[mod_name] = sys.modules[mod_name] self.log[-1].append_child(wnode)
def _set_node_converter(self, val): """Helper function to create a node converter and store it in a dictionary. """ if isinstance(val, str): return self._node_converter[val] name = val.__name__ self._node_converter[name] = val(self) return self._node_converter[name] def __getitem__(self, name): """Return a Node converter. """ return self._node_converter[name] def _set_node_converters(self, fromlang, tolang, style, defaults=None): """Imports the correct module based on the languages and style. """ self.style_module = get_style_module( 'converter', fromlang, style, tolang ) name = '%s-converter-%s-%s' % (fromlang, tolang, style) config.set_style_cfg(self, name, defaults) self._nc = dict() self._node_converter = dict() if hasattr(self.style_module, 'REPOSITORY'): for val in self.style_module.REPOSITORY: self._set_node_converter(val) self._nc['__default__'] = self._set_node_converter(NodeConverter) str_key = list() for key, val in self.style_module.MAPPING.iteritems(): if isinstance(val, str) and val not in self._node_converter: str_key.append((key, val)) else: self._nc[key] = self._set_node_converter(val) for key, val in str_key: self._nc[key] = self._nc[val] def _start(self, node): """Evaluate the start function of the node converter based on the name of the node. """ return self._nc.get(node.name, self._nc['__default__']).start(node) def _end(self, node): """Evaluate the end function of the node converter based on the name of the node. """ return self._nc.get(node.name, self._nc['__default__']).end(node) def _copy(self, node): """Return the copy attribute of the node converter. """ return self._nc.get(node.name, self._nc['__default__']).copy def _copy_children(self, node): """Return the copy_children attribute of the node converter. """ tmp = self._nc.get(node.name, self._nc['__default__']).copy_children return tmp and node.child def _get_direction(self, crt): """Returns the direction in which the traversal should go. """ if crt.child and self._copy_children(crt): return 'd' return 'r' def _clone_node(self, crt): """Clones the node if the node converter assigned to the node has the copy property set to True. """ if self._copy(crt): return crt.clone_node() return LC.Text('') def _convert(self, doc): """Main convert function. """ direction = None # A doc needs to be copied by default. You may prohibit # to copy the children, but there must be a document. crt = doc self.doc.append(doc.clone_node()) self.doc[-1].namespace = dict() if hasattr(self.style_module, 'init_conversion'): self.style_module.init_conversion(self, self.doc[-1]) crtcopy = self.doc[-1] crtcopy = self._start(crtcopy) if self._copy_children(crt): direction = 'd' root = doc else: return while True: if direction is 'd': crt = crt.child[0] clone = self._clone_node(crt) crtcopy.append_child(clone) elif direction is 'r': if crt.next is None: direction = 'u' continue crt = crt.next clone = self._clone_node(crt) crtcopy.parent.append_child(clone) elif direction is 'u': crtcopy = self._end(crtcopy.parent) crtcopy.normalize() if crt.parent is root: break if crt.parent.next is None: crt = crt.parent continue crt = crt.parent.next clone = self._clone_node(crt) crtcopy.parent.append_child(clone) crtcopy = clone if self._copy(crt): crtcopy = self._start(crtcopy) direction = self._get_direction(crt) else: direction = 'r'
[docs] def update_log(self, log, after=True): """Append the messages from a log document to the converters log. Note that this removes the children from log. """ modules = log.modules explanation = log.explanation for mname in modules: if mname not in self.log[-1].modules: self.log[-1].modules[mname] = modules[mname] if mname not in self.log[-1].explanation: self.log[-1].explanation[mname] = explanation[mname] if after: self.log[-1].extend_children(log) else: self.log[-1].extend_before(0, log) # pylint: disable=W0122,E1103
[docs] def exec_python(self, node, id_num, parser, error=True): """Executes the contents of the processing instruction. You must provide an id number identifying the processing instruction, the namespace where the execution takes place and a parser that will parse the output provided by the execution. If `error` is True then any errors generated during the execution will be appended to the output of the document.""" get_current_node.current.append(node) include.converter.append(self) namespace = get_lexor_namespace() if '__NAMESPACE__' not in namespace: namespace['__NAMESPACE__'] = namespace namespace['import_module'] = import_module namespace['include'] = include namespace['echo'] = echo namespace['__FILE__'] = pth.realpath(include.converter[-1].doc[-1].uri) namespace['__DIR__'] = pth.dirname(namespace['__FILE__']) namespace['__NODE__'] = get_current_node() original_stdout = sys.stdout sys.stdout = StringIO() try: exec(node.data, namespace) except BaseException: self.msg(self.__module__, 'E100', node, [id_num]) if error: err_node = LC.Element('python_pi_error') err_node['section'] = str(id_num) err_node.append_child( LC.CData(traceback.format_exc()) ) node.parent.insert_before(node.index, err_node) text = sys.stdout.getvalue() sys.stdout.close() sys.stdout = original_stdout parser.parse(text) node.parent.extend_before(node.index, parser.doc) newnode = Converter.remove_node(node) if parser.log: self.msg(self.__module__, 'W101', node, [id_num]) self.update_log(parser.log) self.msg(self.__module__, 'W102', node, [id_num]) get_current_node.current.pop() include.converter.pop() if include.converter: doc = include.converter[-1].doc[-1] namespace['__FILE__'] = pth.realpath(doc.uri) namespace['__DIR__'] = pth.dirname(namespace['__FILE__']) namespace['__NODE__'] = get_current_node() else: namespace['__FILE__'] = None namespace['__DIR__'] = None namespace['__NODE__'] = None return newnode
[docs]def get_lexor_namespace(): """The execution of python instructions take place in the namespace provided by this function.""" return get_lexor_namespace.namespace
if not hasattr(get_lexor_namespace, 'namespace'): get_lexor_namespace.namespace = dict()
[docs]def get_current_node(): """Return the `Document` node containing the python embeddings currently being executed. """ return get_current_node.current[-1]
if not hasattr(get_current_node, 'current'): get_current_node.current = list()
[docs]def echo(node): """Allows the insertion of Nodes generated within python embeddings. <?python comment = PI('!--', 'This is a comment') echo(comment) ?> """ crt = get_current_node() if isinstance(node, str): crt.parent.insert_before(crt.index, LC.Text(node)) elif isinstance(node, LC.Node): if node.name == '#document': crt.parent.extend_before(crt.index, node) return crt.parent.insert_before(crt.index, node) elif isinstance(node, list): for item in node: echo(item) else: while node: echo(node[0])
[docs]def include(input_file, **keywords): """Inserts a file into the current node. """ parent_converter = include.converter[-1] if input_file[0] != '/': input_file = pth.join(pth.dirname(parent_converter.doc.uri), input_file) info = { 'parser_style': 'default', 'parser_lang': None, 'parser_defaults': None, 'convert_style': 'default', 'convert_from': None, 'convert_to': None, 'convert_defaults': None, 'adopt': True, } for key in keywords: info[key] = keywords[key] if info['parser_lang'] is None: path = pth.realpath(input_file) name = pth.basename(path) name = pth.splitext(name) info['parser_lang'] = name[1][1:] with open(input_file, 'r') as tmpf: text = tmpf.read() parser = LC.Parser(info['parser_lang'], info['parser_style'], info['parser_defaults']) parser.parse(text, input_file) if parser.log: parent_converter.update_log(parser.log) crt = get_current_node() if info['convert_to'] is not None: if info['convert_from'] is None: info['convert_from'] = info['parser_lang'] converter = Converter(info['convert_from'], info['convert_to'], info['convert_style'], info['convert_defaults']) converter.convert(parser.doc) if converter.log: parent_converter.update_log(converter.log) doc = converter.document else: doc = parser.doc if info['adopt']: crt.parent.extend_before(crt.index, doc) else: crt.parent.insert_before(crt.index, doc)
if not hasattr(include, 'converter'): include.converter = list()
[docs]def import_module(mod_path, mod_name=None): """Return a module from a path. If no name is provided then the name of the file loaded will be assigned to the name. When using relative paths, it will find the module relative to the file executing the python embedding. """ doc = include.converter[-1].doc[-1] if not mod_path.endswith('.py'): mod_path += '.py' if mod_path[0] != '/': mod_path = pth.join(pth.dirname(doc.uri), mod_path) if mod_name is None: mod_name = pth.basename(mod_path) if mod_name.endswith('.py'): mod_name = mod_name[:-3] return load_source(mod_name, mod_path)
MSG = { 'E100': 'errors in python processing instruction section `{0}`', 'W101': '--> begin ?python section `{0}` messages', 'W102': '--> end ?python section `{0}` messages', } MSG_EXPLANATION = [ """ - This message is being shown because of E100. - The python processing instructions has mistakes. See the traceback generated to fix the errors. - If the traceback is not shown in the document it may be due to the option `error` being off. """, """ - Python embeddings may generate output to be adapted to the document. Such output also needs to be processed. When the output generates errors these errors get appended to the converter log document. - All messages between W101 and W102 are are simply errors of the parsed output. """, ]