"""
This module defines the elements of the document object model (DOM).
This implementation follows most of the recommendations of w3_.
.. _w3: http://www.w3.org/TR/2012/WD-dom-20121206/
Inheritance Tree
----------------
| :class:`lexor.core.node.Node` (``__builtin__.object``)
| :class:`.CharacterData`
| :class:`.Text`
| :class:`.ProcessingInstruction`
| :class:`.Comment`
| :class:`.CData`
| :class:`.Entity`
| :class:`.DocumentType`
| :class:`.Element`
| :class:`.RawText` (:class:`.Element`, :class:`.CharacterData`)
| :class:`.Void`
| :class:`.Document`
| :class:`.DocumentFragment`
----------------------------------------------------------------------
"""
import os
import sys
from lexor.core import Node
LC = sys.modules['lexor.core']
# pylint: disable=too-many-public-methods
# pylint: disable=too-many-instance-attributes
[docs]class CharacterData(Node):
"""A simple interface to deal with strings. """
__slots__ = ('data')
[docs] def __init__(self, text=''):
"""Set the data property to the value of `text` and set its
name to ``'#character-data'``. """
Node.__init__(self)
self.name = '#character-data'
self.data = text
@property
def node_value(self):
"""Return or set the value of the node. This property is a
wrapper for the ``data`` attribute. """
return self.data
@node_value.setter
[docs] def node_value(self, value):
"""Setter function for data attribute. """
self.data = value
[docs]class Text(CharacterData):
"""A node to represent a string object."""
__slots__ = ()
[docs] def __init__(self, text=''):
"""Call its base constructor and set its name to ``'#text'``.
"""
CharacterData.__init__(self, text)
self.name = '#text'
[docs] def clone_node(self, _=True):
"""Return a new ``Text`` node with the same data content. """
return Text(self.data)
[docs]class ProcessingInstruction(CharacterData):
"""Represents a "processing instruction", used to keep
processor-specific information in the text of the document. """
__slots__ = ('_target')
[docs] def __init__(self, target, data=''):
"""Create a `Text` node with its `data` set to data. """
CharacterData.__init__(self, data)
self.name = target
self._target = target
@property
def target(self):
"""The target of this processing instruction."""
return self._target
@target.setter
[docs] def target(self, new_target):
"""Setter function. """
self.name = new_target
self._target = new_target
[docs] def clone_node(self, _=True):
"""Returns a new PI with the same data content. """
return ProcessingInstruction(self._target, self.data)
[docs]class CData(CharacterData):
"""Although this node has been deprecated from the DOM_, it
seems that xml still uses it.
.. _DOM: https://developer.mozilla.org/en-US/docs/Web/API/Node.nodeType
"""
__slots__ = ()
[docs] def __init__(self, data=''):
"""Create a CDATA node and set the node name to
``'#cdata-section'``."""
CharacterData.__init__(self, data)
self.name = '#cdata-section'
[docs] def clone_node(self, _=True):
"""Returns a new ``CData`` node with the same data content.
"""
return CData(self.data)
[docs]class Entity(CharacterData):
"""From merriam-webster definition_:
- *Something that exists by itself*.
- *Something that is separate from other things*.
This node acts in the same way as a :class:`.Text` node but it
has one main difference. The data it contains should contain no
white spaces. This node should be reserved for special characters
or words that have different meanings across different languages.
For instance in HTML you have the ``&`` to represent ``&``.
In LaTeX you have to type ``\\$`` to represent ``$``. Using this
node will help you handle these Entities hopefully more
efficiently than simply finding and replacing them in a Text node.
.. _definition: http://www.merriam-webster.com/dictionary/entity
"""
__slots__ = ()
[docs] def __init__(self, text=''):
"""Create an ``Entity`` node and set the node name to
``#entity``."""
CharacterData.__init__(self, text)
self.name = '#entity'
[docs] def clone_node(self, _=True):
"""Returns a new ``Entity`` with the same data content. """
return Entity(self.data)
[docs]class DocumentType(CharacterData):
"""A node to store the doctype declaration. This node will not
follow the specifications at this point (May 30, 2013). It will
simply recieve the string in between ``<!doctype`` and ``>``.
Specs: http://www.w3.org/TR/2012/WD-dom-20121206/#documenttype
"""
__slots__ = ()
[docs] def __init__(self, data=''):
"""Create a ``DocumentType`` node and set its name to
``#doctype``. """
CharacterData.__init__(self, data)
self.name = '#doctype'
# The next properties should be obtained from data
# The specs do not mention type, instead they mention name.
# The node name here is #doctype so that it can be easily
# identified. The doctype "name" as they refer is called type.
[docs] def clone_node(self, _=True):
"""Returns a new doctype with the same data content. """
node = DocumentType(self.data)
return node
[docs]class Element(Node):
"""Node object configured to have child Nodes and attributes. """
[docs] def __init__(self, name, data=None):
"""The parameter ``data`` should be a ``dict`` object. The
element will use the keys and values to populate its
attributes. You may modify the elements internal dictionary.
However, this may unintentially overwrite the attributes
defined by the ``__setitem__`` method. If you wish to add
another attribute to the ``Element`` object use the
convention of adding an underscore at the end of the
attribute. i.e
>>> strong = Element('strong')
>>> strong.message_ = 'An internal message'
>>> strong['message'] = 'Attribute message'
"""
Node.__init__(self)
if data is None:
data = dict()
self.__dict__.update(data)
if isinstance(data, dict):
self._order = data.keys()
else:
self._order = list()
for key, _ in data:
if key not in self._order:
self._order.append(key)
self.name = name
self.child = list()
[docs] def __call__(self, selector):
"""Return a :class:`lexor.core.selector.Selector` object. """
return LC.Selector(selector, self)
[docs] def update_attributes(self, node):
"""Copies the attributes of the input node into the calling
node. """
for k in node:
self.__dict__[k] = node.__dict__[k]
if k not in self._order:
self._order.append(k)
[docs] def __getitem__(self, k):
"""Return the `k`-th child of this node if `k` is an integer.
Otherwise return the attribute of name with value of `k`.
>>> x.__getitem__(k) is x[k]
True
"""
if isinstance(k, str):
return self.__dict__[k]
if self.child:
return self.child[k]
return None
[docs] def get(self, k, val=''):
"""Return the attribute of name with value of `k`."""
return self.__dict__.get(k, val)
[docs] def __setitem__(self, k, val):
"""Overloaded array operator. Appends or modifies an
attribute. See its base method
:meth:`lexor.core.node.Node.__setitem__` for documentation on
when `val` is not string.
>>> x.__setitem__(attname) = 'att' <==> x[attname] = 'att'
"""
if isinstance(k, str):
self.__dict__[k] = val
if k not in self._order:
self._order.append(k)
if k == 'id' and self.owner:
self.owner.id_dict[k] = self
else:
Node.__setitem__(self, k, val)
[docs] def __delitem__(self, k):
"""Remove a child or attribute.
>>> x.__delitem__(k) <==> del x[k]
"""
if isinstance(k, str):
self.__dict__.__delitem__(k)
self._order.remove(k)
else:
Node.__delitem__(self, k)
[docs] def __contains__(self, obj):
"""Return ``True`` if `obj` is a node and it is a child
of this element or if `obj` is an attribute of this
element. Return ``False`` otherwise.
>>> x.__contains__(obj) == obj in x
True
"""
if isinstance(obj, Node):
return self.child.__contains__(obj)
else:
return self._order.__contains__(obj)
[docs] def contains(self, obj):
"""Unlike ``__contains__``, this method returns ``True`` if
`obj` is any of the desendents of the node. """
if obj.level < self.level + 1:
return False
while obj.level > self.level + 1:
obj = obj.parent
if obj is None:
return False
return obj in self
[docs] def __iter__(self):
"""Iterate over the element attributes names.
>>> for attribute_name in node: ...
"""
for k in self._order:
yield k
@property
[docs] def attlen(self):
"""The number of attributes. """
return len(self._order)
@property
[docs] def attributes(self):
"""Return a list of the attribute names in the element. """
return list(self._order)
@property
[docs] def values(self):
"""Return a list of the attribute values in the Element. """
return [self.__dict__[k] for k in self._order]
[docs] def attribute(self, index):
"""Return the name of the attribute at the specified index. """
return self._order[index]
[docs] def attr(self, index):
"""Return the value of the attribute at the specified index.
"""
return self.__dict__[self._order[index]]
[docs] def items(self):
"""return all the items. """
return zip(self._order, self.values)
[docs] def update(self, dict_):
"""update with the values of `dict_`. useful when the element
is empty and you created an Attr object. then just update the
values."""
for key, val in dict_.items():
self.__setitem__(key, val)
[docs] def rename(self, old_name, new_name):
"""Renames an attribute.
>>> from lexor.core.elements import Element
>>> node = Element('div')
>>> node['att1'] = 'val1'
>>> node
div[0x10a090750 att1="val1"]:
>>> node.rename('att1', 'new-att-name')
>>> node
div[0x10a090750 new-att-name="val1"]:
"""
if isinstance(old_name, str):
index = self._order.index(old_name)
else:
index = old_name # Assume old_name
self.__dict__[new_name] = self.__dict__[self._order[index]]
del self.__dict__[self._order[index]]
self._order[index] = new_name
[docs] def clone_node(self, deep=False, normalize=True):
"""Returns a new node. When deep is True, it will clone also
clone all the child nodes."""
# May want to provide a node to which the clone will be
# appended to. If this is done then we will not have to
# traverse through all the elements of the node to adjust
# the level of the child nodes when we move the node around
node = Element(self.name)
node.update_attributes(self)
if deep is False or not self.child:
return node
crt = self
crtcopy = node
direction = 'd'
while True:
if direction is 'd':
crt = crt.child[0]
clone = crt.clone_node()
crtcopy.append_child(clone)
elif direction is 'r':
if crt.next is None:
direction = 'u'
continue
crt = crt.next
clone = crt.clone_node()
crtcopy.parent.append_child(clone)
elif direction is 'u':
crtcopy = crtcopy.parent
if normalize:
crtcopy.normalize()
if crt.parent is self:
break
if crt.parent.next is None:
crt = crt.parent
continue
crt = crt.parent.next
clone = crt.clone_node()
crtcopy.parent.append_child(clone)
crtcopy = clone
if crt.child:
direction = 'd'
else:
direction = 'r'
return node
[docs] def get_elements_by_class_name(self, classname):
"""Return a list of all child elements which have all of the
given class names. """
nodes = []
if not self.child:
return nodes
patterns = set([i.strip() for i in classname.split()])
crt = self
direction = 'd'
while True:
if direction is 'd':
crt = crt.child[0]
elif direction is 'r':
if crt.next is None:
direction = 'u'
continue
crt = crt.next
elif direction is 'u':
if crt.parent is self:
break
if crt.parent.next is None:
crt = crt.parent
continue
crt = crt.parent.next
if isinstance(crt, Element) and 'class' in crt:
crtclass = [i.strip() for i in crt['class'].split()]
if patterns.issubset(set(crtclass)):
nodes.append(crt)
if crt.child:
direction = 'd'
else:
direction = 'r'
return nodes
[docs] def children(self, children=None, **keywords):
"""Set the elements children by providing a list of nodes or
a string. If using a string then you may provide any of the
following keywords to dictate how to parse and convert:
- parser_style: ``'_'``
- parser_lang: ``'html``
- parser_defaults: ``None``,
- convert_style: ``'_'``,
- convert_from: ``None``,
- convert_to: ``'html'``,
- convert_defaults: ``None``,
- convert: ``'false'``
If no children are provided then it returns a string of the children
written in plain html. To change this behavior provide the
following keywords:
- writer_style: ``'plain'``
- writer_lang: ``'html``
.. important::
This requires the installation of lexor styles.
"""
if children is None:
lang = keywords.get('writer_lang', 'html')
style = keywords.get('writer_style', 'plain')
writer = LC.Writer(lang, style)
if self.owner is not None and self.owner.defaults is not None:
for var, val in self.owner.defaults.iteritems():
writer.defaults[var] = os.path.expandvars(str(val))
for var, val in keywords.iteritems():
writer.defaults[var] = os.path.expandvars(str(val))
result = ''
for child in self.child:
writer.write(child)
result += str(writer)
writer.close()
return result
if isinstance(children, str):
info = {
'parser_style': '_',
'parser_lang': 'html',
'parser_defaults': None,
'convert_style': '_',
'convert_from': None,
'convert_to': 'html',
'convert_defaults': None,
'convert': 'false'
}
for att in keywords:
info[att] = keywords[att]
parser = LC.Parser(info['parser_lang'],
info['parser_style'],
info['parser_defaults'])
parser.parse(children)
if info['convert'] == 'true' and info['convert_to'] is not None:
if info['convert_from'] is None:
info['convert_from'] = info['parser_lang']
converter = LC.Converter(info['convert_from'],
info['convert_to'],
info['convert_style'],
info['convert_defaults'])
converter.convert(parser.doc)
children = converter.doc.pop()
converter.log.pop()
else:
children = parser.doc
children.temporary = True
self.remove_children()
self.extend_children(children)
[docs]class RawText(Element, CharacterData):
"""A few elements do not have children, instead they have data.
Such elements exist in HTML: ``script``, ``title`` among
others."""
[docs] def __init__(self, name, data='', att=None):
"""You may provide `att` as a ``dict`` object. """
CharacterData.__init__(self, data)
Element.__init__(self, name, att)
self.child = None
[docs] def clone_node(self, deep=True, normalize=True):
"""Returns a new ``RawText`` element"""
node = RawText(self.name)
node.update_attributes(self)
if deep is True:
node.data = self.data
return node
[docs]class Void(Element):
"""An element with no children. """
[docs] def __init__(self, name, att=None):
"""You may provide `att` as a `dict` object. """
Element.__init__(self, name, att)
self.child = None
[docs] def clone_node(self, _=True, normalize=True):
"""Returns a new ``Void`` element. """
node = Void(self.name)
node.update_attributes(self)
return node
[docs]class Document(Element):
"""Contains information about the document that it holds. """
[docs] def __init__(self, lang='xml', style='default'):
"""Creates a new document object and sets its name to
``#document``."""
Element.__init__(self, '#document')
self.level = -1
self.owner = self
self.lang = lang
self.style = style
self.uri_ = None
self.defaults = None
self.id_dict = dict()
self.meta = dict()
self.temporary = True
[docs] def clone_node(self, deep=False, normalize=True):
"""Returns a new Document. Note: it does not copy
the default values. """
node = Document(self.lang, self.style)
node.update_attributes(self)
node.uri_ = self.uri_
node.meta.update(self.meta)
if deep is False or not self.child:
return node
clone = Element.clone_node(self, deep, normalize)
clone.name = '' # not a document
node.extend_children(clone)
return node
@property
def language(self):
"""The current document's language. This property is used by
the writer to determine how to write the document.
This property is a wrapper for the ``lang`` attribute. """
return self.lang
@language.setter
[docs] def language(self, val):
"""Setter function for language. """
self.lang = val
@property
def writing_style(self):
"""The current document's style. This property is used by
the writer to determine how to write the document.
This property is a wrapper for the ``style`` attribute.
"""
return self.style
@writing_style.setter
[docs] def writing_style(self, val):
"""Docstring for setter. """
self.style = val
@property
[docs] def uri(self):
"""The Uniform Resource Identifier. This property may become
useful if the document represents a file. This property
should be set by the a :class:`~lexor.core.parser.Parser`
object telling us the location of the file that it parsed
into the Document object. """
return self.uri_
@staticmethod
[docs] def create_element(tagname, data=None):
"""Utility function to avoid having to import
``lexor.core.elements`` module. Returns an element object. """
return Element(tagname, data)
[docs] def get_element_by_id(self, element_id):
"""Return the first element, in tree order, within the
document whose ID is element_id, or None if there is none. """
return self.id_dict.get(element_id, None)
[docs]class DocumentFragment(Document):
"""Takes in an element and "steals" its children. This element
should only be used as a temporary container. Note that the
``__str__`` method may not yield the expected results since all
the function will do is use the ``__str__`` method in each of its
children. First assign this object to an actual Document. """
def __init__(self, lang='xml', style='default'):
Document.__init__(self, lang, style)
self.name = '#document-fragment'
[docs] def append_child(self, new_child):
"""Adds the node new_child to the end of the list of children
of this node. The children contained in a
``DocumentFragment`` only have a parent (the
``DocumentFragment``). As opposed as
:meth:`lexor.core.node.Node.append_child` which also takes
care of the ``prev`` and ``next`` attributes. """
if isinstance(new_child, str):
new_child = Text(new_child)
elif not isinstance(new_child, Node):
raise TypeError("Only Nodes can be appended.")
if new_child.parent is not None:
del new_child.parent[new_child.index]
self.child.append(new_child)
new_child.parent = self
new_child.owner = self
return new_child
[docs] def __repr__(self):
"""
>>> x.__repr__() <==> repr(x)
"""
return ''.join([repr(node) for node in self.child])
[docs] def __str__(self):
"""
>>> x.__str__() <==> str(x)
"""
return ''.join([str(node) for node in self.child])