File size: 5,081 Bytes
6370773 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import pytest
from bs4.element import (
CData,
Comment,
Declaration,
Doctype,
NavigableString,
RubyParenthesisString,
RubyTextString,
Script,
Stylesheet,
TemplateString,
)
from . import SoupTest
class TestNavigableString(SoupTest):
def test_text_acquisition_methods(self):
# These methods are intended for use against Tag, but they
# work on NavigableString as well,
s = NavigableString("fee ")
cdata = CData("fie ")
comment = Comment("foe ")
assert "fee " == s.get_text()
assert "fee" == s.get_text(strip=True)
assert ["fee "] == list(s.strings)
assert ["fee"] == list(s.stripped_strings)
assert ["fee "] == list(s._all_strings())
assert "fie " == cdata.get_text()
assert "fie" == cdata.get_text(strip=True)
assert ["fie "] == list(cdata.strings)
assert ["fie"] == list(cdata.stripped_strings)
assert ["fie "] == list(cdata._all_strings())
# Since a Comment isn't normally considered 'text',
# these methods generally do nothing.
assert "" == comment.get_text()
assert [] == list(comment.strings)
assert [] == list(comment.stripped_strings)
assert [] == list(comment._all_strings())
# Unless you specifically say that comments are okay.
assert "foe" == comment.get_text(strip=True, types=Comment)
assert "foe " == comment.get_text(types=(Comment, NavigableString))
def test_string_has_immutable_name_property(self):
# string.name is defined as None and can't be modified
string = self.soup("s").string
assert None == string.name
with pytest.raises(AttributeError):
string.name = 'foo'
class TestNavigableStringSubclasses(SoupTest):
def test_cdata(self):
# None of the current builders turn CDATA sections into CData
# objects, but you can create them manually.
soup = self.soup("")
cdata = CData("foo")
soup.insert(1, cdata)
assert str(soup) == "<![CDATA[foo]]>"
assert soup.find(string="foo") == "foo"
assert soup.contents[0] == "foo"
def test_cdata_is_never_formatted(self):
"""Text inside a CData object is passed into the formatter.
But the return value is ignored.
"""
self.count = 0
def increment(*args):
self.count += 1
return "BITTER FAILURE"
soup = self.soup("")
cdata = CData("<><><>")
soup.insert(1, cdata)
assert b"<![CDATA[<><><>]]>" == soup.encode(formatter=increment)
assert 1 == self.count
def test_doctype_ends_in_newline(self):
# Unlike other NavigableString subclasses, a DOCTYPE always ends
# in a newline.
doctype = Doctype("foo")
soup = self.soup("")
soup.insert(1, doctype)
assert soup.encode() == b"<!DOCTYPE foo>\n"
def test_declaration(self):
d = Declaration("foo")
assert "<?foo?>" == d.output_ready()
def test_default_string_containers(self):
# In some cases, we use different NavigableString subclasses for
# the same text in different tags.
soup = self.soup(
"<div>text</div><script>text</script><style>text</style>"
)
assert [NavigableString, Script, Stylesheet] == [
x.__class__ for x in soup.find_all(string=True)
]
# The TemplateString is a little unusual because it's generally found
# _inside_ children of a <template> element, not a direct child of the
# <template> element.
soup = self.soup(
"<template>Some text<p>In a tag</p></template>Some text outside"
)
assert all(
isinstance(x, TemplateString)
for x in soup.template._all_strings(types=None)
)
# Once the <template> tag closed, we went back to using
# NavigableString.
outside = soup.template.next_sibling
assert isinstance(outside, NavigableString)
assert not isinstance(outside, TemplateString)
# The TemplateString is also unusual because it can contain
# NavigableString subclasses of _other_ types, such as
# Comment.
markup = b"<template>Some text<p>In a tag</p><!--with a comment--></template>"
soup = self.soup(markup)
assert markup == soup.template.encode("utf8")
def test_ruby_strings(self):
markup = "<ruby>漢 <rp>(</rp><rt>kan</rt><rp>)</rp> 字 <rp>(</rp><rt>ji</rt><rp>)</rp></ruby>"
soup = self.soup(markup)
assert isinstance(soup.rp.string, RubyParenthesisString)
assert isinstance(soup.rt.string, RubyTextString)
# Just as a demo, here's what this means for get_text usage.
assert "漢字" == soup.get_text(strip=True)
assert "漢(kan)字(ji)" == soup.get_text(
strip=True,
types=(NavigableString, RubyTextString, RubyParenthesisString)
)
|