|
"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve).""" |
|
|
|
import warnings |
|
try: |
|
import soupsieve |
|
except ImportError as e: |
|
soupsieve = None |
|
warnings.warn( |
|
'The soupsieve package is not installed. CSS selectors cannot be used.' |
|
) |
|
|
|
|
|
class CSS(object): |
|
"""A proxy object against the soupsieve library, to simplify its |
|
CSS selector API. |
|
|
|
Acquire this object through the .css attribute on the |
|
BeautifulSoup object, or on the Tag you want to use as the |
|
starting point for a CSS selector. |
|
|
|
The main advantage of doing this is that the tag to be selected |
|
against doesn't need to be explicitly specified in the function |
|
calls, since it's already scoped to a tag. |
|
""" |
|
|
|
def __init__(self, tag, api=soupsieve): |
|
"""Constructor. |
|
|
|
You don't need to instantiate this class yourself; instead, |
|
access the .css attribute on the BeautifulSoup object, or on |
|
the Tag you want to use as the starting point for your CSS |
|
selector. |
|
|
|
:param tag: All CSS selectors will use this as their starting |
|
point. |
|
|
|
:param api: A plug-in replacement for the soupsieve module, |
|
designed mainly for use in tests. |
|
""" |
|
if api is None: |
|
raise NotImplementedError( |
|
"Cannot execute CSS selectors because the soupsieve package is not installed." |
|
) |
|
self.api = api |
|
self.tag = tag |
|
|
|
def escape(self, ident): |
|
"""Escape a CSS identifier. |
|
|
|
This is a simple wrapper around soupselect.escape(). See the |
|
documentation for that function for more information. |
|
""" |
|
if soupsieve is None: |
|
raise NotImplementedError( |
|
"Cannot escape CSS identifiers because the soupsieve package is not installed." |
|
) |
|
return self.api.escape(ident) |
|
|
|
def _ns(self, ns, select): |
|
"""Normalize a dictionary of namespaces.""" |
|
if not isinstance(select, self.api.SoupSieve) and ns is None: |
|
|
|
|
|
|
|
ns = self.tag._namespaces |
|
return ns |
|
|
|
def _rs(self, results): |
|
"""Normalize a list of results to a Resultset. |
|
|
|
A ResultSet is more consistent with the rest of Beautiful |
|
Soup's API, and ResultSet.__getattr__ has a helpful error |
|
message if you try to treat a list of results as a single |
|
result (a common mistake). |
|
""" |
|
|
|
from bs4.element import ResultSet |
|
return ResultSet(None, results) |
|
|
|
def compile(self, select, namespaces=None, flags=0, **kwargs): |
|
"""Pre-compile a selector and return the compiled object. |
|
|
|
:param selector: A CSS selector. |
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
used in the CSS selector to namespace URIs. By default, |
|
Beautiful Soup will use the prefixes it encountered while |
|
parsing the document. |
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
soupsieve.compile() method. |
|
|
|
:param kwargs: Keyword arguments to be passed into SoupSieve's |
|
soupsieve.compile() method. |
|
|
|
:return: A precompiled selector object. |
|
:rtype: soupsieve.SoupSieve |
|
""" |
|
return self.api.compile( |
|
select, self._ns(namespaces, select), flags, **kwargs |
|
) |
|
|
|
def select_one(self, select, namespaces=None, flags=0, **kwargs): |
|
"""Perform a CSS selection operation on the current Tag and return the |
|
first result. |
|
|
|
This uses the Soup Sieve library. For more information, see |
|
that library's documentation for the soupsieve.select_one() |
|
method. |
|
|
|
:param selector: A CSS selector. |
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
used in the CSS selector to namespace URIs. By default, |
|
Beautiful Soup will use the prefixes it encountered while |
|
parsing the document. |
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
soupsieve.select_one() method. |
|
|
|
:param kwargs: Keyword arguments to be passed into SoupSieve's |
|
soupsieve.select_one() method. |
|
|
|
:return: A Tag, or None if the selector has no match. |
|
:rtype: bs4.element.Tag |
|
|
|
""" |
|
return self.api.select_one( |
|
select, self.tag, self._ns(namespaces, select), flags, **kwargs |
|
) |
|
|
|
def select(self, select, namespaces=None, limit=0, flags=0, **kwargs): |
|
"""Perform a CSS selection operation on the current Tag. |
|
|
|
This uses the Soup Sieve library. For more information, see |
|
that library's documentation for the soupsieve.select() |
|
method. |
|
|
|
:param selector: A string containing a CSS selector. |
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
used in the CSS selector to namespace URIs. By default, |
|
Beautiful Soup will pass in the prefixes it encountered while |
|
parsing the document. |
|
|
|
:param limit: After finding this number of results, stop looking. |
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
soupsieve.select() method. |
|
|
|
:param kwargs: Keyword arguments to be passed into SoupSieve's |
|
soupsieve.select() method. |
|
|
|
:return: A ResultSet of Tag objects. |
|
:rtype: bs4.element.ResultSet |
|
|
|
""" |
|
if limit is None: |
|
limit = 0 |
|
|
|
return self._rs( |
|
self.api.select( |
|
select, self.tag, self._ns(namespaces, select), limit, flags, |
|
**kwargs |
|
) |
|
) |
|
|
|
def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs): |
|
"""Perform a CSS selection operation on the current Tag. |
|
|
|
This uses the Soup Sieve library. For more information, see |
|
that library's documentation for the soupsieve.iselect() |
|
method. It is the same as select(), but it returns a generator |
|
instead of a list. |
|
|
|
:param selector: A string containing a CSS selector. |
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
used in the CSS selector to namespace URIs. By default, |
|
Beautiful Soup will pass in the prefixes it encountered while |
|
parsing the document. |
|
|
|
:param limit: After finding this number of results, stop looking. |
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
soupsieve.iselect() method. |
|
|
|
:param kwargs: Keyword arguments to be passed into SoupSieve's |
|
soupsieve.iselect() method. |
|
|
|
:return: A generator |
|
:rtype: types.GeneratorType |
|
""" |
|
return self.api.iselect( |
|
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs |
|
) |
|
|
|
def closest(self, select, namespaces=None, flags=0, **kwargs): |
|
"""Find the Tag closest to this one that matches the given selector. |
|
|
|
This uses the Soup Sieve library. For more information, see |
|
that library's documentation for the soupsieve.closest() |
|
method. |
|
|
|
:param selector: A string containing a CSS selector. |
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
used in the CSS selector to namespace URIs. By default, |
|
Beautiful Soup will pass in the prefixes it encountered while |
|
parsing the document. |
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
soupsieve.closest() method. |
|
|
|
:param kwargs: Keyword arguments to be passed into SoupSieve's |
|
soupsieve.closest() method. |
|
|
|
:return: A Tag, or None if there is no match. |
|
:rtype: bs4.Tag |
|
|
|
""" |
|
return self.api.closest( |
|
select, self.tag, self._ns(namespaces, select), flags, **kwargs |
|
) |
|
|
|
def match(self, select, namespaces=None, flags=0, **kwargs): |
|
"""Check whether this Tag matches the given CSS selector. |
|
|
|
This uses the Soup Sieve library. For more information, see |
|
that library's documentation for the soupsieve.match() |
|
method. |
|
|
|
:param: a CSS selector. |
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
used in the CSS selector to namespace URIs. By default, |
|
Beautiful Soup will pass in the prefixes it encountered while |
|
parsing the document. |
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
soupsieve.match() method. |
|
|
|
:param kwargs: Keyword arguments to be passed into SoupSieve's |
|
soupsieve.match() method. |
|
|
|
:return: True if this Tag matches the selector; False otherwise. |
|
:rtype: bool |
|
""" |
|
return self.api.match( |
|
select, self.tag, self._ns(namespaces, select), flags, **kwargs |
|
) |
|
|
|
def filter(self, select, namespaces=None, flags=0, **kwargs): |
|
"""Filter this Tag's direct children based on the given CSS selector. |
|
|
|
This uses the Soup Sieve library. It works the same way as |
|
passing this Tag into that library's soupsieve.filter() |
|
method. More information, for more information see the |
|
documentation for soupsieve.filter(). |
|
|
|
:param namespaces: A dictionary mapping namespace prefixes |
|
used in the CSS selector to namespace URIs. By default, |
|
Beautiful Soup will pass in the prefixes it encountered while |
|
parsing the document. |
|
|
|
:param flags: Flags to be passed into Soup Sieve's |
|
soupsieve.filter() method. |
|
|
|
:param kwargs: Keyword arguments to be passed into SoupSieve's |
|
soupsieve.filter() method. |
|
|
|
:return: A ResultSet of Tag objects. |
|
:rtype: bs4.element.ResultSet |
|
|
|
""" |
|
return self._rs( |
|
self.api.filter( |
|
select, self.tag, self._ns(namespaces, select), flags, **kwargs |
|
) |
|
) |
|
|