"""Selector
This module is trying to simulate jquery selectors. If some code
looks similar to that of the Sizzle CSS Selector engine it is because
the ideas were taken from it.
In short, credit goes to [Sizzle][1] and CSS for the seletor idea.
[1]: http://sizzlejs.com/
"""
import re
import sys
import types
from datetime import datetime
from time import mktime
from pprint import pprint
LC = sys.modules['lexor.core']
[docs]def get_date():
"""Obtain an integer representation of the date. """
date = datetime.utcnow()
return int(mktime(date.timetuple()))
[docs]def mark_function(fnc):
"""Mark a function for special use by Sizzle. """
fnc.expando = True
return fnc
BOOLEANS = "checked|selected|async|autofocus|autoplay|controls|" + \
"defer|disabled|hidden|ismap|loop|multiple|open|" + \
"readonly|required|scoped"
WHITESPACE = "[\\x20\\t\\r\\n\\f]"
CHAR_ENCODING = "(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+"
IDENTIFIER = CHAR_ENCODING.replace("w", "w#")
ATTRIBUTES = "\\[" + WHITESPACE + "*(" + CHAR_ENCODING + ")" + \
WHITESPACE + "*(?:([*^$|!~]?=)" + WHITESPACE + \
"*(?:(['\"])((?:\\\\.|[^\\\\])*?)\\3|(" + \
IDENTIFIER + ")|)|)" + WHITESPACE + "*\\]"
PSEUDOS = ":(" + CHAR_ENCODING + \
")(?:\\(((['\"])((?:\\\\.|[^\\\\])*?)\\3|((?:\\\\.|" + \
"[^\\\\()[\\]]|" + ATTRIBUTES.replace("3", "8") + \
")*)|.*)\\)|)"
RCOMMA = re.compile("^" + WHITESPACE + "*," + WHITESPACE + "*")
RCOMBINATORS = re.compile("^" + WHITESPACE + "*([>+~]|" +
WHITESPACE + ")" + WHITESPACE + "*")
# Use .findall instead of search or match since this regex had the
# global attribute
RATTRIBUTEQUOTE = re.compile("=" + WHITESPACE + "*([^\\]'\"]*?)" +
WHITESPACE + "*\\]")
RPSEUDO = re.compile(PSEUDOS)
RIDENTIFIER = re.compile("^" + IDENTIFIER + "$")
MATCH_EXPR = {
"ID": re.compile("^#(" + CHAR_ENCODING + ")"),
"CLASS": re.compile("^\\.(" + CHAR_ENCODING + ")"),
"TAG": re.compile("^(" + CHAR_ENCODING.replace("w", "w*") + ")"),
"ATTR": re.compile("^" + ATTRIBUTES),
"PSEUDO": re.compile("^" + PSEUDOS),
"CHILD": re.compile("^:(only|first|last|nth|nth-last)-" +
"(child|of-type)(?:\\(" + WHITESPACE +
"*(even|odd|(([+-]|)(\\d*)n|)" + WHITESPACE +
"*(?:([+-]|)" + WHITESPACE + "*(\\d+)|))" +
WHITESPACE + "*\\)|)", re.IGNORECASE),
"bool": re.compile("^(?:" + BOOLEANS + ")$", re.IGNORECASE),
# For use in libraries implementing .is()
# We use this for POS matching in `select`
"needsContext": re.compile("^" + WHITESPACE +
"*[>+~]|:(even|odd|eq|gt|lt|nth|" +
"first|last)(?:\\(" + WHITESPACE +
"*((?:-\\d)?\\d*)" + WHITESPACE +
"*\\)|)(?=[^-]|$)", re.IGNORECASE),
}
RQUICKEXPR = re.compile(r'^(?:#([\w-]+)|(\w+)|\.([\w-]+))$')
RUNESCAPE = re.compile("\\\\([\\da-f]{1,6}" + WHITESPACE + "?|(" +
WHITESPACE + ")|.)", re.IGNORECASE)
EXPANDO = 'sizzle'+str(get_date())
def _pre_filter_attr(match):
"""function for EXPR['pre_filter']['ATTR']"""
#match[0] = match[0].replace(runescape, funescape);
#match[0] = re.sub(runescape, funescape, match[0])
# Move the given value to match[3] whether quoted or unquoted
match[2] = match[3] or match[4] or ""
#match[2] = match[2].replace(runescape, funescape);
if match[1] == "~=":
match[2] = " " + match[2] + " "
return match[:4]
def _pre_filter_child(match):
"""function for EXPR['pre_filter']['CHILD']"""
match[0] = match[0].lower()
if match[0].slice[:3] == "nth":
# nth-* requires argument
if not match[2]:
print 'ERROR'
sys.exit(2)
# Should raise an error showing match[0]
# numeric x and y parameters for Expr.filter.CHILD
# remember that false/true cast respectively to 0/1
# ML: Possible +=
if match[4]:
match[4] = match[5] + (match[6] or 1)
else:
match[4] = 2*(match[3] == "even" or match[3] == "odd")
match[5] = ((match[7] + match[8]) or match[3] == "odd")
#other types prohibit arguments
elif match[3]:
print 'ERROR'
sys.exit(2)
return match
def _filter_tag(node_name_selector):
name = node_name_selector
if node_name_selector == '*':
return lambda: True
else:
return lambda elem: elem and elem.name.lower() == name
def _filter_class(class_name):
try:
pattern = _filter_class.cache[class_name]
return pattern
except KeyError:
pass
pattern = re.compile("(^|" + WHITESPACE + ")" + class_name + "(" + WHITESPACE + "|$)")
_filter_class.cache[class_name] = lambda elem: pattern.search(elem['class']) if 'class' in elem else None
return _filter_class.cache[class_name]
_filter_class.cache = dict()
EXPR = {
'create_pseudo': mark_function,
'match': MATCH_EXPR,
'attr_handle': {},
'find': {},
'relative': {
'>': {'dir': "parent_node", 'first': True},
' ': {'dir': "parent_node"},
'+': {'dir': "previous_sibling", 'first': True},
'~': {'dir': "previous_sibling"}
},
'pre_filter': {
'ATTR': _pre_filter_attr,
'CHILD': _pre_filter_child,
},
'filter': {
'TAG': _filter_tag,
'CLASS': _filter_class,
}
}
[docs]def clone_obj(obj, parser):
"""Utility function to create deep copies of objects used for the
Selector object. A parser should be given in case the object is a
string. """
try:
return obj.clone_node(True)
except AttributeError:
pass
if hasattr(obj, '__iter__'):
return [clone_obj(ele, parser) for ele in obj]
parser.parse(str(obj))
return parser.doc
[docs]def sizzle(selector, context, results=None, seed=None):
"""Function shamelessly borrowed and partially translated to
python from http://sizzlejs.com/. """
if results is None:
results = list()
if not selector or not isinstance(selector, str):
return results
if not isinstance(context, LC.Element):
return list()
match = RQUICKEXPR.match(selector)
if match is not None: # Shortcuts
match = match.groups()
element_id = match[0]
if element_id: # sizzle('#ID')
if context.name == '#document':
elem = context.get_element_by_id(element_id)
if elem:
results.append(elem)
elif context.owner:
elem = context.owner.get_element_by_id(element_id)
if elem and context.contains(elem):
results.append(elem)
elif match[1]: # sizzle('TAG')
results.extend(context.get_nodes_by_name(selector))
elif isinstance(context, LC.Element): # sizzle('.CLASS')
results.extend(context.get_elements_by_class_name(match[2]))
return results
return select(selector.strip(), context, results, seed)
[docs]def select(selector, context, results, seed):
""" A low-level selection function that works with Sizzle's
compiled selector functions
@param {String|Function} selector A selector or a pre-compiled
selector function built with Sizzle.compile
@param {Element} context
@param {Array} [results]
@param {Array} [seed] A set of elements to match against
"""
#compiled = typeof selector === "function" && selector
#selector = compiled.selector or selector
match = not seed and tokenize(selector)
results = results or list()
if len(match) == 1:
# Other complicated stuff
pass
compile_selector(selector, match)(seed, context, results)
return results
def matcher_from_tokens(tokens):
pass
def matcher_from_group_matchers(element_matchers, set_matchers):
pass
def compile_selector(selector, match=None):
try:
return compile_selector.cache[selector]
except KeyError:
pass
set_matchers = list()
element_matchers = list()
if match is None:
match = tokenize(selector)
i = len(match) - 1
while i:
cached = matcher_from_tokens(match[i])
if cached[EXPANDO]:
set_matchers.append(cached)
else:
element_matchers.append(cached)
cached = matcher_from_group_matchers(element_matchers, set_matchers)
compile_selector.cache[selector] = cached
#cached.selector = selector
return cached
compile_selector.cache = dict()
[docs]def tokenize(selector, parse_only=False):
"""Tokenize..."""
try:
cached = tokenize.cache[selector]
except KeyError:
pass
else:
return 0 if parse_only else cached
so_far = selector
groups = list()
pre_filters = EXPR['pre_filter']
matched = False
while so_far:
match = RCOMMA.match(so_far)
if not matched or match:
if match:
match = match.groups()
so_far = so_far[len(match[0]):] or so_far
tokens = list()
groups.append(tokens)
matched = False
match = RCOMBINATORS.match(so_far)
if match:
matched = match.group(0)
match = list(match.groups())
tokens.append({
'value': matched,
'type': match[0].strip(),
})
so_far = so_far[len(matched):]
for ftype in EXPR['filter']:
match = MATCH_EXPR[ftype].match(so_far)
if match:
matched = match.group(0)
match = list(match.groups())
#match = pre_filters[ftype](match).groups()
tokens.append({
'value': matched,
'type': ftype,
'matches': match,
})
so_far = so_far[len(matched):]
if not matched:
break
if parse_only:
return len(so_far)
else:
tokenize.cache[selector] = groups
return tokenize.cache[selector]
if not hasattr(tokenize, 'cache'):
tokenize.cache = dict()
[docs]class Selector(object):
"""JQuery like object. """
def __init__(self, selector, node, results=None):
self.data = sizzle(selector, node, results)
def __getitem__(self, k):
"""Return the k-th element selected.
x.__getitem__(k) <==> x[k]
"""
return self.data[k]
def __repr__(self):
"""repr method. """
result = '\n----------\n'
for node in self.data:
result += repr(node)
result += '\n----------\n'
return result
[docs] def find(self, selector):
"""Get the descendants of each element in the current set of
matched elements, filtered by a selector. """
current = self.data
self.data = list()
for node in current:
sizzle(selector, node, self.data)
return self
[docs] def contents(self):
"""Get the children of each element in the set of matched
elements, including text and comment nodes."""
current = self.data
self.data = list()
for node in current:
if node:
self.data.extend(node.child)
return self
@staticmethod
def _append(node, content, parser):
"""Helper function to `append` method. """
if isinstance(content, Selector):
node.extend_children(content.data)
elif isinstance(content, LC.Node):
if (content.name in ['#document', '#document-fragment']
and content.temporary):
node.extend_children(content)
else:
node.append_child(content)
elif hasattr(content, '__iter__'):
node.extend_children(content)
else:
parser.parse(str(content))
node.extend_children(parser.doc)
[docs] def append(self, *arg, **keywords):
"""Insert content, specified by the parameter, to the end of
each element in the set of matched elements.
Should behave similarly as https://api.jquery.com/append/.
Major difference is in the function. When passing a function
it should take 2 parameters: node, index. Where node will be
the current element to which the return value will be
appended to. """
info = {
'lang': 'html',
'style': 'default',
'defaults': None,
}
for key in keywords:
info[key] = keywords[key]
parser = LC.Parser(info['lang'], info['style'], info['defaults'])
if len(arg) == 1 and isinstance(arg[0], types.FunctionType):
for num, node in enumerate(self.data):
self._append(node, arg[0](node, num), parser)
else:
for content in arg:
if isinstance(content, str):
parser.parse(content)
content = parser.doc
elif isinstance(content, list):
for num in xrange(len(content)):
if isinstance(content[num], str):
parser.parse(content[num])
content[num] = parser.doc
for i in xrange(len(self.data) - 1):
clone = clone_obj(content, parser)
self._append(self.data[i], clone, parser)
if self.data:
self._append(self.data[-1], content, parser)
@staticmethod
def _prepend(node, content, parser):
"""Helper function to `prepend` method. """
if isinstance(content, Selector):
node.extend_before(0, content.data)
elif isinstance(content, LC.Node):
if (content.name in ['#document', '#document-fragment']
and content.temporary):
node.extend_before(0, content)
else:
node.insert_before(0, content)
elif hasattr(content, '__iter__'):
print 'CONTENT = %r' % content
node.extend_before(0, content)
else:
parser.parse(str(content))
node.extend_before(0, parser.doc)
[docs] def prepend(self, *arg, **keywords):
"""Insert content, specified by the parameter, to the
beginning of each element in the setof matched elements.
Should behave similarly as https://api.jquery.com/append/.
Major difference is in the function. When passing a function
it should take 2 parameters: node, index. Where node will be
the current element to which the return value will be
appended to. """
info = {
'lang': 'html',
'style': 'default',
'defaults': None,
}
for key in keywords:
info[key] = keywords[key]
parser = LC.Parser(info['lang'], info['style'], info['defaults'])
if len(arg) == 1 and isinstance(arg[0], types.FunctionType):
for num, node in enumerate(self.data):
self._prepend(node, arg[0](node, num), parser)
else:
for content in arg:
if isinstance(content, str):
parser.parse(content)
content = parser.doc
elif isinstance(content, list):
for num in xrange(len(content)):
if isinstance(content[num], str):
parser.parse(content[num])
content[num] = parser.doc
for i in xrange(len(self.data) - 1):
clone = clone_obj(content, parser)
self._prepend(self.data[i], clone, parser)
if self.data:
self._prepend(self.data[-1], content, parser)
@staticmethod
def _after(node, content, parser):
"""Helper function to `after` method. """
if isinstance(content, Selector):
node.append_nodes_after(content.data)
elif isinstance(content, LC.Node):
if content.name in ['#document', '#document-fragment']:
node.append_nodes_after(content)
else:
node.append_after(content)
elif hasattr(content, '__iter__'):
node.append_nodes_after(content)
else:
parser.parse(str(content))
node.append_nodes_after(parser.doc)
[docs] def after(self, *arg, **keywords):
"""Insert content, specified by the parameter, after each
element in the set of matched elements.
: .after(content [,content])
:: content
Type: htmlString or Element or Array or jQuery string, Node,
array of Node, or Selector object to insert after each
element in the set of matched elements.
:: content
Type: htmlString or Element or Array or jQuery One or
more additional DOM elements, arrays of elements, HTML
strings, or jQuery objects to insert after each element in
the set of matched elements.
: .after(function(node, index))
:: function(node, index)
A function that returns a string, DOM element(s), or Selector
object to insert after each element in the set of matched
elements. Receives the element in the set and its index
position in the set as its arguments.
: .after(..., lang='html', style='default', 'defaults'=None)
:: lang
The language in which strings will be parsed in.
:: style
The style in which strings will be parsed in.
:: defaults
A dictionary with string keywords and values especifying
options for the particular style.
"""
info = {
'lang': 'html',
'style': 'default',
'defaults': None,
}
for key in keywords:
info[key] = keywords[key]
parser = LC.Parser(info['lang'], info['style'], info['defaults'])
if len(arg) == 1 and isinstance(arg[0], types.FunctionType):
for num, node in enumerate(self.data):
self._after(node, arg[0](node, num), parser)
else:
for content in arg:
if isinstance(content, str):
parser.parse(content)
content = parser.doc
elif isinstance(content, list):
for num in xrange(len(content)):
if isinstance(content[num], str):
parser.parse(content[num])
content[num] = parser.doc
for i in xrange(len(self.data) - 1):
clone = clone_obj(content, parser)
self._after(self.data[i], clone, parser)
if self.data:
self._after(self.data[-1], content, parser)
@staticmethod
def _before(node, content, parser):
"""Helper function to `after` method. """
if isinstance(content, Selector):
node.prepend_nodes_before(content.data)
elif isinstance(content, LC.Node):
if content.name in ['#document', '#document-fragment']:
node.prepend_nodes_before(content)
else:
node.prepend_before(content)
elif hasattr(content, '__iter__'):
node.prepend_nodes_before(content)
else:
parser.parse(str(content))
node.prepend_nodes_before(parser.doc)
[docs] def before(self, *arg, **keywords):
"""Insert content, specified by the parameter, before each
element in the set of matched elements.
: .before(content [,content])
:: content
Type: htmlString or Element or Array or jQuery string, Node,
array of Node, or Selector object to insert before each
element in the set of matched elements.
:: content
Type: htmlString or Element or Array or jQuery One or
more additional DOM elements, arrays of elements, HTML
strings, or jQuery objects to insert before each element in
the set of matched elements.
: .before(function(node, index))
:: function(node, index)
A function that returns a string, DOM element(s), or Selector
object to insert before each element in the set of matched
elements. Receives the element in the set and its index
position in the set as its arguments.
: .before(..., lang='html', style='default', 'defaults'=None)
:: lang
The language in which strings will be parsed in.
:: style
The style in which strings will be parsed in.
:: defaults
A dictionary with string keywords and values especifying
options for the particular style.
"""
info = {
'lang': 'html',
'style': 'default',
'defaults': None,
}
for key in keywords:
info[key] = keywords[key]
parser = LC.Parser(info['lang'], info['style'], info['defaults'])
if len(arg) == 1 and isinstance(arg[0], types.FunctionType):
for num, node in enumerate(self.data):
self._before(node, arg[0](node, num), parser)
else:
for content in arg:
if isinstance(content, str):
parser.parse(content)
content = parser.doc
elif isinstance(content, list):
for num in xrange(len(content)):
if isinstance(content[num], str):
parser.parse(content[num])
content[num] = parser.doc
for i in xrange(len(self.data) - 1):
clone = clone_obj(content, parser)
self._before(self.data[i], clone, parser)
if self.data:
self._before(self.data[-1], content, parser)
def __iter__(self):
for node in self.data:
yield node
def __len__(self):
"""Return the number of elements.
x.__len__() <==> len(x)
"""
return len(self.data)