Source code for lexor.core.elements

"""
This module defines the elements of the document object model (DOM).
This implementation follows most of the recommendations of w3_.

.. _w3: http://www.w3.org/TR/2012/WD-dom-20121206/

Inheritance Tree
----------------

|   :class:`lexor.core.node.Node` (``__builtin__.object``)
|        :class:`.CharacterData`
|             :class:`.Text`
|             :class:`.ProcessingInstruction`
|             :class:`.Comment`
|             :class:`.CData`
|             :class:`.Entity`
|             :class:`.DocumentType`
|        :class:`.Element`
|             :class:`.RawText` (:class:`.Element`, :class:`.CharacterData`)
|             :class:`.Void`
|             :class:`.Document`
|                  :class:`.DocumentFragment`

----------------------------------------------------------------------

"""
import os
import sys
from lexor.core import Node
LC = sys.modules['lexor.core']
# pylint: disable=too-many-public-methods
# pylint: disable=too-many-instance-attributes


[docs]class CharacterData(Node):
    """A simple interface to deal with strings. """

    __slots__ = ('data')

[docs]    def __init__(self, text=''):
        """Set the data property to the value of `text` and set its
        name to ``'#character-data'``. """
        Node.__init__(self)
        self.name = '#character-data'
        self.data = text

    @property
    def node_value(self):
        """Return or set the value of the node. This property is a
        wrapper for the ``data`` attribute. """
        return self.data

    @node_value.setter
[docs]    def node_value(self, value):
        """Setter function for data attribute. """
        self.data = value


[docs]class Text(CharacterData):
    """A node to represent a string object."""

    __slots__ = ()

[docs]    def __init__(self, text=''):
        """Call its base constructor and set its name to ``'#text'``.
        """
        CharacterData.__init__(self, text)
        self.name = '#text'

[docs]    def clone_node(self, _=True):
        """Return a new ``Text`` node with the same data content. """
        return Text(self.data)


[docs]class ProcessingInstruction(CharacterData):
    """Represents a "processing instruction", used to keep
    processor-specific information in the text of the document. """
    __slots__ = ('_target')

[docs]    def __init__(self, target, data=''):
        """Create a `Text` node with its `data` set to data. """
        CharacterData.__init__(self, data)
        self.name = target
        self._target = target

    @property
    def target(self):
        """The target of this processing instruction."""
        return self._target

    @target.setter
[docs]    def target(self, new_target):
        """Setter function. """
        self.name = new_target
        self._target = new_target

[docs]    def clone_node(self, _=True):
        """Returns a new PI with the same data content. """
        return ProcessingInstruction(self._target, self.data)


[docs]class Comment(CharacterData):
    """A node to store comments. """

    __slots__ = ('type')

[docs]    def __init__(self, data=''):
        """Create a comment node. """
        CharacterData.__init__(self, data)
        self.name = '#comment'
        self.type = None

    @property
    def comment_type(self):
        """Type of comment. This property is meant to help with
        documents that support different styles of comments. """
        return self.type

    @comment_type.setter
[docs]    def comment_type(self, comment_type):
        """Setter function for comment_type. """
        self.type = comment_type

[docs]    def clone_node(self, _=True):
        """Returns a new comment with the same data content. """
        node = Comment(self.data)
        node.type = self.type
        return node


[docs]class CData(CharacterData):
    """Although this node has been deprecated from the DOM_, it
    seems that xml still uses it.

    .. _DOM: https://developer.mozilla.org/en-US/docs/Web/API/Node.nodeType

    """

    __slots__ = ()

[docs]    def __init__(self, data=''):
        """Create a CDATA node and set the node name to
        ``'#cdata-section'``."""
        CharacterData.__init__(self, data)
        self.name = '#cdata-section'

[docs]    def clone_node(self, _=True):
        """Returns a new ``CData`` node with the same data content.
        """
        return CData(self.data)


[docs]class Entity(CharacterData):
    """From merriam-webster definition_:

    - *Something that exists by itself*.
    - *Something that is separate from other things*.

    This node acts in the same way as a :class:`.Text` node but it
    has one main difference. The data it contains should contain no
    white spaces. This node should be reserved for special characters
    or words that have different meanings across different languages.
    For instance in HTML you have the ``&amp;`` to represent ``&``.
    In LaTeX you have to type ``\\$`` to represent ``$``. Using this
    node will help you handle these Entities hopefully more
    efficiently than simply finding and replacing them in a Text node.

    .. _definition: http://www.merriam-webster.com/dictionary/entity

    """

    __slots__ = ()

[docs]    def __init__(self, text=''):
        """Create an ``Entity`` node and set the node name to
        ``#entity``."""
        CharacterData.__init__(self, text)
        self.name = '#entity'

[docs]    def clone_node(self, _=True):
        """Returns a new ``Entity`` with the same data content. """
        return Entity(self.data)


[docs]class DocumentType(CharacterData):
    """A node to store the doctype declaration. This node will not
    follow the specifications at this point (May 30, 2013). It will
    simply recieve the string in between ``<!doctype`` and ``>``.

    Specs: http://www.w3.org/TR/2012/WD-dom-20121206/#documenttype

    """
    __slots__ = ()

[docs]    def __init__(self, data=''):
        """Create a ``DocumentType`` node and set its name to
        ``#doctype``. """
        CharacterData.__init__(self, data)
        self.name = '#doctype'
        # The next properties should be obtained from data
        # The specs do not mention type, instead they mention name.
        # The node name here is #doctype so that it can be easily
        # identified. The doctype "name" as they refer is called type.

[docs]    def clone_node(self, _=True):
        """Returns a new doctype with the same data content. """
        node = DocumentType(self.data)
        return node


[docs]class Element(Node):
    """Node object configured to have child Nodes and attributes. """

[docs]    def __init__(self, name, data=None):
        """The parameter ``data`` should be a ``dict`` object. The
        element will use the keys and values to populate its
        attributes. You may modify the elements internal dictionary.
        However, this may unintentially overwrite the attributes
        defined by the ``__setitem__`` method. If you wish to add
        another attribute to the ``Element`` object use the
        convention of adding an underscore at the end of the
        attribute. i.e

        >>> strong = Element('strong')
        >>> strong.message_ = 'An internal message'
        >>> strong['message'] = 'Attribute message'

        """
        Node.__init__(self)
        if data is None:
            data = dict()
        self.__dict__.update(data)
        if isinstance(data, dict):
            self._order = data.keys()
        else:
            self._order = list()
            for key, _ in data:
                if key not in self._order:
                    self._order.append(key)
        self.name = name
        self.child = list()

[docs]    def __call__(self, selector):
        """Return a :class:`lexor.core.selector.Selector` object. """
        return LC.Selector(selector, self)

[docs]    def update_attributes(self, node):
        """Copies the attributes of the input node into the calling
        node. """
        for k in node:
            self.__dict__[k] = node.__dict__[k]
            if k not in self._order:
                self._order.append(k)

[docs]    def __getitem__(self, k):
        """Return the `k`-th child of this node if `k` is an integer.
        Otherwise return the attribute of name with value of `k`.

        >>> x.__getitem__(k) is x[k]
        True

        """
        if isinstance(k, str):
            return self.__dict__[k]
        if self.child:
            return self.child[k]
        return None

[docs]    def get(self, k, val=''):
        """Return the attribute of name with value of `k`."""
        return self.__dict__.get(k, val)

[docs]    def __setitem__(self, k, val):
        """Overloaded array operator. Appends or modifies an
        attribute. See its base method
        :meth:`lexor.core.node.Node.__setitem__` for documentation on
        when `val` is not string.

        >>> x.__setitem__(attname) = 'att' <==> x[attname] = 'att'

        """
        if isinstance(k, str):
            self.__dict__[k] = val
            if k not in self._order:
                self._order.append(k)
            if k == 'id' and self.owner:
                self.owner.id_dict[k] = self
        else:
            Node.__setitem__(self, k, val)

[docs]    def __delitem__(self, k):
        """Remove a child or attribute.

        >>> x.__delitem__(k) <==> del x[k]

        """
        if isinstance(k, str):
            self.__dict__.__delitem__(k)
            self._order.remove(k)
        else:
            Node.__delitem__(self, k)

[docs]    def __contains__(self, obj):
        """Return ``True`` if `obj` is a node and it is a child
        of this element or if `obj` is an attribute of this
        element. Return ``False`` otherwise.

        >>> x.__contains__(obj) == obj in x
        True

        """
        if isinstance(obj, Node):
            return self.child.__contains__(obj)
        else:
            return self._order.__contains__(obj)

[docs]    def contains(self, obj):
        """Unlike ``__contains__``, this method returns ``True`` if
        `obj` is any of the desendents of the node. """
        if obj.level < self.level + 1:
            return False
        while obj.level > self.level + 1:
            obj = obj.parent
            if obj is None:
                return False
        return obj in self

[docs]    def __iter__(self):
        """Iterate over the element attributes names.

        >>> for attribute_name in node: ...

        """
        for k in self._order:
            yield k

    @property
[docs]    def attlen(self):
        """The number of attributes. """
        return len(self._order)

    @property
[docs]    def attributes(self):
        """Return a list of the attribute names in the element. """
        return list(self._order)

    @property
[docs]    def values(self):
        """Return a list of the attribute values in the Element. """
        return [self.__dict__[k] for k in self._order]

[docs]    def attribute(self, index):
        """Return the name of the attribute at the specified index. """
        return self._order[index]

[docs]    def attr(self, index):
        """Return the value of the attribute at the specified index.
        """
        return self.__dict__[self._order[index]]

[docs]    def items(self):
        """return all the items. """
        return zip(self._order, self.values)

[docs]    def update(self, dict_):
        """update with the values of `dict_`. useful when the element
        is empty and you created an Attr object. then just update the
        values."""
        for key, val in dict_.items():
            self.__setitem__(key, val)

[docs]    def rename(self, old_name, new_name):
        """Renames an attribute. 

        >>> from lexor.core.elements import Element
        >>> node = Element('div')
        >>> node['att1'] = 'val1'
        >>> node
        div[0x10a090750 att1="val1"]:
        >>> node.rename('att1', 'new-att-name')
        >>> node
        div[0x10a090750 new-att-name="val1"]:
        
        """
        if isinstance(old_name, str):
            index = self._order.index(old_name)
        else:
            index = old_name  # Assume old_name
        self.__dict__[new_name] = self.__dict__[self._order[index]]
        del self.__dict__[self._order[index]]
        self._order[index] = new_name

[docs]    def clone_node(self, deep=False, normalize=True):
        """Returns a new node. When deep is True, it will clone also
        clone all the child nodes."""
        # May want to provide a node to which the clone will be
        # appended to. If this is done then we will not have to
        # traverse through all the elements of the node to adjust
        # the level of the child nodes when we move the node around
        node = Element(self.name)
        node.update_attributes(self)
        if deep is False or not self.child:
            return node
        crt = self
        crtcopy = node
        direction = 'd'
        while True:
            if direction is 'd':
                crt = crt.child[0]
                clone = crt.clone_node()
                crtcopy.append_child(clone)
            elif direction is 'r':
                if crt.next is None:
                    direction = 'u'
                    continue
                crt = crt.next
                clone = crt.clone_node()
                crtcopy.parent.append_child(clone)
            elif direction is 'u':
                crtcopy = crtcopy.parent
                if normalize:
                    crtcopy.normalize()
                if crt.parent is self:
                    break
                if crt.parent.next is None:
                    crt = crt.parent
                    continue
                crt = crt.parent.next
                clone = crt.clone_node()
                crtcopy.parent.append_child(clone)
            crtcopy = clone
            if crt.child:
                direction = 'd'
            else:
                direction = 'r'
        return node

[docs]    def get_elements_by_class_name(self, classname):
        """Return a list of all child elements which have all of the
        given class names. """
        nodes = []
        if not self.child:
            return nodes
        patterns = set([i.strip() for i in classname.split()])
        crt = self
        direction = 'd'
        while True:
            if direction is 'd':
                crt = crt.child[0]
            elif direction is 'r':
                if crt.next is None:
                    direction = 'u'
                    continue
                crt = crt.next
            elif direction is 'u':
                if crt.parent is self:
                    break
                if crt.parent.next is None:
                    crt = crt.parent
                    continue
                crt = crt.parent.next
            if isinstance(crt, Element) and 'class' in crt:
                crtclass = [i.strip() for i in crt['class'].split()]
                if patterns.issubset(set(crtclass)):
                    nodes.append(crt)
            if crt.child:
                direction = 'd'
            else:
                direction = 'r'
        return nodes

[docs]    def children(self, children=None, **keywords):
        """Set the elements children by providing a list of nodes or
        a string. If using a string then you may provide any of the
        following keywords to dictate how to parse and convert:

        - parser_style: ``'_'``
        - parser_lang: ``'html``
        - parser_defaults: ``None``,
        - convert_style: ``'_'``,
        - convert_from: ``None``,
        - convert_to: ``'html'``,
        - convert_defaults: ``None``,
        - convert: ``'false'``

        If no children are provided then it returns a string of the children
        written in plain html. To change this behavior provide the
        following keywords:

        - writer_style: ``'plain'``
        - writer_lang: ``'html``

        .. important::

            This requires the installation of lexor styles.

        """
        if children is None:
            lang = keywords.get('writer_lang', 'html')
            style = keywords.get('writer_style', 'plain')
            writer = LC.Writer(lang, style)
            if self.owner is not None and self.owner.defaults is not None:
                for var, val in self.owner.defaults.iteritems():
                    writer.defaults[var] = os.path.expandvars(str(val))
            for var, val in keywords.iteritems():
                writer.defaults[var] = os.path.expandvars(str(val))
            result = ''
            for child in self.child:
                writer.write(child)
                result += str(writer)
            writer.close()
            return result
        if isinstance(children, str):
            info = {
                'parser_style': '_',
                'parser_lang': 'html',
                'parser_defaults': None,
                'convert_style': '_',
                'convert_from': None,
                'convert_to': 'html',
                'convert_defaults': None,
                'convert': 'false'
            }
            for att in keywords:
                info[att] = keywords[att]
            parser = LC.Parser(info['parser_lang'],
                               info['parser_style'],
                               info['parser_defaults'])
            parser.parse(children)
            if info['convert'] == 'true' and info['convert_to'] is not None:
                if info['convert_from'] is None:
                    info['convert_from'] = info['parser_lang']
                converter = LC.Converter(info['convert_from'],
                                         info['convert_to'],
                                         info['convert_style'],
                                         info['convert_defaults'])
                converter.convert(parser.doc)
                children = converter.doc.pop()
                converter.log.pop()
            else:
                children = parser.doc
            children.temporary = True
        self.remove_children()
        self.extend_children(children)


[docs]class RawText(Element, CharacterData):
    """A few elements do not have children, instead they have data.
    Such elements exist in HTML: ``script``, ``title`` among
    others."""

[docs]    def __init__(self, name, data='', att=None):
        """You may provide `att` as a ``dict`` object. """
        CharacterData.__init__(self, data)
        Element.__init__(self, name, att)
        self.child = None

[docs]    def clone_node(self, deep=True, normalize=True):
        """Returns a new ``RawText`` element"""
        node = RawText(self.name)
        node.update_attributes(self)
        if deep is True:
            node.data = self.data
        return node


[docs]class Void(Element):
    """An element with no children. """

[docs]    def __init__(self, name, att=None):
        """You may provide `att` as a `dict` object. """
        Element.__init__(self, name, att)
        self.child = None

[docs]    def clone_node(self, _=True, normalize=True):
        """Returns a new ``Void`` element. """
        node = Void(self.name)
        node.update_attributes(self)
        return node


[docs]class Document(Element):
    """Contains information about the document that it holds. """

[docs]    def __init__(self, lang='xml', style='default'):
        """Creates a new document object and sets its name to
        ``#document``."""
        Element.__init__(self, '#document')
        self.level = -1
        self.owner = self
        self.lang = lang
        self.style = style
        self.uri_ = None
        self.defaults = None
        self.id_dict = dict()
        self.meta = dict()
        self.temporary = True

[docs]    def clone_node(self, deep=False, normalize=True):
        """Returns a new Document. Note: it does not copy
        the default values. """
        node = Document(self.lang, self.style)
        node.update_attributes(self)
        node.uri_ = self.uri_
        node.meta.update(self.meta)
        if deep is False or not self.child:
            return node
        clone = Element.clone_node(self, deep, normalize)
        clone.name = ''  # not a document
        node.extend_children(clone)
        return node

    @property
    def language(self):
        """The current document's language. This property is used by
        the writer to determine how to write the document.

        This property is a wrapper for the ``lang`` attribute. """
        return self.lang

    @language.setter
[docs]    def language(self, val):
        """Setter function for language. """
        self.lang = val

    @property
    def writing_style(self):
        """The current document's style. This property is used by
        the writer to determine how to write the document.

        This property is a wrapper for the ``style`` attribute.
        """
        return self.style

    @writing_style.setter
[docs]    def writing_style(self, val):
        """Docstring for setter. """
        self.style = val

    @property
[docs]    def uri(self):
        """The Uniform Resource Identifier. This property may become
        useful if the document represents a file. This property
        should be set by the a :class:`~lexor.core.parser.Parser`
        object telling us the location of the file that it parsed
        into the Document object. """
        return self.uri_

    @staticmethod
[docs]    def create_element(tagname, data=None):
        """Utility function to avoid having to import
        ``lexor.core.elements`` module. Returns an element object. """
        return Element(tagname, data)

[docs]    def get_element_by_id(self, element_id):
        """Return the first element, in tree order, within the
        document whose ID is element_id, or None if there is none. """
        return self.id_dict.get(element_id, None)


[docs]class DocumentFragment(Document):
    """Takes in an element and "steals" its children. This element
    should only be used as a temporary container. Note that the
    ``__str__`` method may not yield the expected results since all
    the function will do is use the ``__str__`` method in each of its
    children. First assign this object to an actual Document. """

    def __init__(self, lang='xml', style='default'):
        Document.__init__(self, lang, style)
        self.name = '#document-fragment'

[docs]    def append_child(self, new_child):
        """Adds the node new_child to the end of the list of children
        of this node. The children contained in a
        ``DocumentFragment`` only have a parent (the
        ``DocumentFragment``). As opposed as
        :meth:`lexor.core.node.Node.append_child` which also takes
        care of the ``prev`` and ``next`` attributes. """
        if isinstance(new_child, str):
            new_child = Text(new_child)
        elif not isinstance(new_child, Node):
            raise TypeError("Only Nodes can be appended.")
        if new_child.parent is not None:
            del new_child.parent[new_child.index]
        self.child.append(new_child)
        new_child.parent = self
        new_child.owner = self
        return new_child

[docs]    def __repr__(self):
        """
        >>> x.__repr__() <==> repr(x)

        """
        return ''.join([repr(node) for node in self.child])

[docs]    def __str__(self):
        """
        >>> x.__str__() <==> str(x)

        """
        return ''.join([str(node) for node in self.child])