"""Writer Module
Provides the `Writer` object which defines the basic mechanism for
writing the objects defined in `lexor.core.elements`. This involves
using objects derived from the abstract class `NodeWriter`. See
`lexor.core.dev` for more information on how to write objects derived
from `NodeWriter` to be able to write `Documents` in the way you
desire.
"""
import re
from cStringIO import StringIO
from lexor.command.lang import get_style_module
from lexor.command import config
RE = re.compile(" ")
def _replacer(*key_val):
"""Helper function for replace.
Source: <http://stackoverflow.com/a/15221068/788553>
"""
replace_dict = dict(key_val)
replacement_function = lambda match: replace_dict[match.group(0)]
pattern = re.compile("|".join([re.escape(k) for k, _ in key_val]), re.M)
return lambda string: pattern.sub(replacement_function, string)
[docs]def replace(string, *key_val):
"""Replacement of strings done in one pass. Example:
>>> replace("a < b && b < c", ('<', '<'), ('&', '&'))
'a < b && b < c'
Source: <http://stackoverflow.com/a/15221068/788553>
"""
return _replacer(*key_val)(string)
[docs]def find_whitespace(line, start, lim):
"""Attempts to find the index of the first whitespace before
lim, if its not found, then it looks ahead. """
index = line.rfind(' ', start, lim+1)
if index != -1:
return index
index = line.find(' ', lim)
if index != -1:
return index
return len(line)
[docs]class NodeWriter(object):
"""A node writer is an object which writes a node in three steps:
`start`, `data/child`, `end`.
"""
def __init__(self, writer):
"""A `NodeWriter` needs to be initialized with a writer
object. If this method is to be overloaded then make sure
that it only accepts one parameter: `writer`. This method is
used by `Writer` and it calls it with itself as the parameter.
"""
self.writer = writer
[docs] def write(self, string, split=False):
"""Writes the string to a file object. The file object is
determined by the `Writer` object that initialized this
object (`self`). """
self.writer.write_str(string, split)
[docs] def start(self, node):
"""Overload this method to write part of the `Node` object in
the first encounter with the `Node`. """
pass
[docs] def data(self, node):
"""This method gets called only by `CharacterData` nodes.
This method should be overloaded to write their attribute
`data`, otherwise it will write the node's data as it is. """
self.writer.write_str(node.data)
@classmethod
[docs] def child(cls, _):
"""This method gets called for `Elements` that have children.
If it gets overwritten then it will not traverse through
child nodes unless you return something other than None.
This method by default returns `True` so that the `Writer`
can traverse through the child nodes. """
return True
[docs] def end(self, node):
"""Overload this method to write part of the `Node` object in
the last encounter with the `Node`. """
pass
[docs]class DefaultWriter(NodeWriter):
"""If the language does not define a NodeWriter for __default__
then the writer will use this default writer.
"""
[docs] def start(self, node):
"""Write the start of the node as a xml tag. """
att = ' '.join(['%s="%s"' % (k, v) for k, v in node.items()])
if att != '':
self.write('<%s %s>' % (node.name, att))
else:
self.write('<%s>' % node.name)
[docs] def end(self, node):
"""Write the end of the node as an xml end tag. """
self.write('</%s>' % node.name)
# The default of 7 attributes for class is too restrictive.
# pylint: disable=R0902
[docs]class Writer(object):
"""To see the languages in which a `Writer` object is able to
write see the `lexor.lang` module. """
def __init__(self, lang='xml', style='default', defaults=None):
"""Create a new `Writer` by specifying the language and the
style in which `Node` objects will be written. """
if defaults is None:
defaults = dict()
self.defaults = defaults
self.style_module = None
self._lang = lang
self._style = style
self._filename = None
self._file = None # Points to a file object
self._nw = None # Array of NodeWriters
self._reload = True # Create new NodeWriters
self._raw = None
self._wrap = None
self._buffer = None
self._break_hint = None
self._indent = None
self._indent_empty = None
self.pos = None
self.width = None
self.root = None # The node to be written
self.prev_str = None # Reference to the last string printed
@property
[docs] def filename(self):
"""READ-ONLY: The name of the file to which a `Node` object
was last written to. """
return self._filename
@property
def language(self):
"""The language in which the `Writer` writes `Node` objects. """
return self._lang
@language.setter
[docs] def language(self, value):
"""_lang setter method. """
self._lang = value
self._reload = True
@property
def indent(self):
"""The indentation at the beginning of each line. """
return self._indent
@indent.setter
[docs] def indent(self, value):
"""_indent setter method. """
self.flush_buffer(tail=False)
self._indent = value
@property
def writing_style(self):
"""The style in which the `Writer` writes a `Node` object. """
return self._style
@writing_style.setter
[docs] def writing_style(self, value):
"""_style setter method. """
self._style = value
self._reload = True
@property
def string_buffer(self):
"""The current string buffer. This is the string that will
be printed after its length exceeds the writer's width. """
return self._buffer
@string_buffer.setter
[docs] def string_buffer(self, value):
"""_indent setter method. """
self._buffer = value
[docs] def set(self, lang, style, defaults=None):
"""Set the language and style in one call. """
self._style = style
self._lang = lang
self.defaults = defaults
self._reload = True
def __str__(self):
"""Attempts to retrieve the last written string. """
if self._filename is None and self._file is not None:
return self._file.getvalue()
if self._filename is not None:
tmp = open(self._filename, 'r')
val = tmp.read()
tmp.close()
return val
return None
def _write_str(self, string):
"""Helper function for write_str. """
if string != '':
self.prev_str = string
self._file.write(string)
nlines = string.count('\n')
self.pos[0] += nlines
if nlines > 0:
self.pos[1] = len(string) - string.rfind('\n')
else:
self.pos[1] += len(string)
[docs] def write_str(self, string, split=False):
"""The write function is meant to be used with Node objects.
Use this function to write simple strings while the file
descriptor is open. """
if self._raw:
self._write_str(string)
return
if not self._wrap:
if self._indent != '':
lines = string.split('\n')
if self.pos[1] == 1:
if lines[0] != '' or self._indent_empty:
lines[0] = self._indent + lines[0]
for num in range(1, len(lines)):
if lines[num] != '' or self._indent_empty:
lines[num] = self._indent + lines[num]
self._write_str('\n'.join(lines))
else:
self._write_str(string)
return
if split:
self._break_hint.append(string)
lines = string.split('\n')
num = 0
while num < len(lines) - 1:
self._buffer += lines[num]
self.normalize_buffer()
self.flush_buffer()
self._write_str('\n')
num += 1
self._buffer += lines[num]
self.normalize_buffer()
[docs] def flush_buffer(self, tail=True):
"""Empty the contents of the buffer. """
if not tail and self._buffer.endswith(' '):
self._buffer = self._buffer[:-1]
if self.pos[1] == 1:
if self._buffer.startswith(' '):
self._buffer = self._buffer[1:]
if self._buffer != '':
self._write_str(self._indent + self._buffer)
elif self._indent_empty:
self._write_str(self._indent)
else:
self._write_str(self._buffer)
self._buffer = ''
[docs] def normalize_buffer(self):
"""The term normalize means that the length of the buffer
will be less than or equal to the wrapping width. Anything
that exceeds the limit will be flushed. """
line = self._buffer
indent = self._indent
if self.pos[1] > 1:
indent = ''
limit = self.width - self.pos[1] - len(indent) + 1
while len(line) > limit:
start = 0
if line[start] == ' ':
start += 1
end = find_whitespace(line, start, limit)
while self._break_hint:
index = line.find(self._break_hint[0], start)
del self._break_hint[0]
if index > -1 and index <= limit:
if end > limit or index > end:
end = index
if end == len(line):
break
self._write_str(indent + line[start:end] + '\n')
if line[end:end+1] == ' ':
line = line[end+1:]
else:
line = line[end:]
indent = self._indent
limit = self.width - self.pos[1] - len(indent) + 1
self._buffer = line
[docs] def enable_wrap(self):
"""Use this to set the writing in wrapping mode. """
self._wrap = True
[docs] def disable_wrap(self):
"""Turn off wrapping. """
self.flush_buffer()
self._wrap = False
[docs] def enable_raw(self):
"""Use this to set the writing in raw mode. """
self.flush_buffer()
self._raw = True
[docs] def disable_raw(self):
"""Turn off raw mode. """
self._raw = False
[docs] def raw_enabled(self):
"""Determine if raw mode is enabled or not. """
return self._raw
[docs] def wrap_enabled(self):
"""Determine if wrap mode is enabled or not. """
return self._wrap
[docs] def endl(self, force=True, tot=1, tail=False):
"""Insert a new line character. By setting `force` to False
you may omit inserting a new line character if the last
character printed was already the new line character."""
prev_str = self.last()
self.flush_buffer(tail)
if force or (not prev_str.endswith('\n') and
prev_str != self._indent):
self._write_str('\n'*tot)
[docs] def last(self):
"""Returns the last written string with the contents of the
buffer. """
if self.pos[1] == 1 and self._buffer.startswith(' '):
return self.prev_str + self._buffer[1:]
return self.prev_str + self._buffer
[docs] def write(self, node, filename=None, mode='w'):
"""Write node to a file or string. To write to a string use
the default parameters, otherwise provide a file name. If
filename is provided you have the option of specifying the
mode: 'w' or 'a'.
You may also provide a file you may have opened yourself in
place of filename so that the writer writes to that file.
Use the __str__ function to retrieve the contents written to
a string.
"""
if isinstance(filename, file):
# Check for stdout, stderr
self._filename = file
self._file = filename
elif filename is None:
# Check for StringIO
self._filename = None
if self._file is not None:
self._file.close()
self._file = StringIO()
else:
self._filename = filename
self._file = open(filename, mode)
self.root = node
self._raw = True
self._wrap = False
self._buffer = ''
self._break_hint = []
self._indent = ''
self._indent_empty = False
self.pos = [1, 1]
self.width = 70
self.prev_str = '\n'
if self._reload:
self._set_node_writers(self._lang, self._style, self.defaults)
self._reload = False
self._set_node_writers_writer()
if hasattr(self.style_module, 'pre_process'):
self.style_module.pre_process(self, node)
self._write(node)
self.flush_buffer()
if hasattr(self.style_module, 'post_process'):
self.style_module.post_process(self, node)
if isinstance(filename, file):
pass
elif filename is not None:
self._file.close()
[docs] def close(self):
"""Close the file. """
if self._filename is not file:
self._file.close()
def _set_node_writers(self, lang, style, defaults=None):
"""Imports the correct module based on the language and
style. """
self.style_module = get_style_module('writer', lang, style)
name = '%s-writer-%s' % (lang, style)
config.set_style_cfg(self, name, defaults)
self._nw = dict()
self._nw['__default__'] = DefaultWriter(self)
nw_obj = NodeWriter(self)
self._nw['#document'] = nw_obj
self._nw['#document-fragment'] = nw_obj
self._nw['#text'] = nw_obj
self._nw['#entity'] = nw_obj
str_key = list()
for key, val in self.style_module.MAPPING.iteritems():
if isinstance(val, str):
str_key.append((key, val))
else:
self._nw[key] = val(self)
for key, val in str_key:
self._nw[key] = self._nw[val]
#@deprecated
[docs] def get_node_writer(self, name):
"""Return one of the NodeWriter objects available to the
Writer."""
return self._nw.get(name, self._nw['__default__'])
def __getitem__(self, name):
"""Return a Node parser. """
return self._nw.get(name, self._nw['__default__'])
def _set_node_writers_writer(self):
"""To be called before writing since the file will change. """
for key in self._nw:
self._nw[key].writer = self
def _write_start(self, node):
"""To be called during tree traversal when node is first
encountered. """
self._nw.get(node.name, self._nw['__default__']).start(node)
def _write_data(self, node):
"""To be called during tree traversal after _write_start if the
node is a `CharacterData`. """
self._nw.get(node.name, self._nw['__default__']).data(node)
def _write_child(self, node):
"""To be called during tree traversal after _write_start if the
node has children. """
return self._nw.get(node.name, self._nw['__default__']).child(node)
def _write_end(self, node):
"""To be called during tree traversal on last visit to node. """
self._nw.get(node.name, self._nw['__default__']).end(node)
def _get_direction(self, crt):
"""Returns the direction in which the traversal should go. """
if hasattr(crt, 'data'):
self._write_data(crt)
self._write_end(crt)
return 'r'
elif crt.child:
if self._write_child(crt) is None:
return 'r'
else:
return 'd'
else:
self._write_end(crt)
return 'r'
def _write(self, root):
"""To be called during actual write function. """
crt = root
direction = None
self._write_start(crt)
if hasattr(crt, 'data'):
self._write_data(crt)
self._write_end(crt)
return
if crt.child:
if self._write_child(crt) is None:
return
else:
direction = 'd'
else:
self._write_end(crt)
return
while True:
if direction is 'd':
crt = crt.child[0]
elif direction is 'r':
if crt.next is None:
direction = 'u'
continue
crt = crt.next
elif direction is 'u':
self._write_end(crt.parent)
if crt.parent is root:
break
if crt.parent.next is None:
crt = crt.parent
continue
crt = crt.parent.next
self._write_start(crt)
direction = self._get_direction(crt)