File size: 5,081 Bytes
6370773
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import pytest

from bs4.element import (
    CData,
    Comment,
    Declaration,
    Doctype,
    NavigableString,
    RubyParenthesisString,
    RubyTextString,
    Script,
    Stylesheet,
    TemplateString,
)

from . import SoupTest

class TestNavigableString(SoupTest):

    def test_text_acquisition_methods(self):
        # These methods are intended for use against Tag, but they
        # work on NavigableString as well,
        
        s = NavigableString("fee ")
        cdata = CData("fie ")
        comment = Comment("foe ")

        assert "fee " == s.get_text()
        assert "fee" == s.get_text(strip=True)
        assert ["fee "] == list(s.strings)
        assert ["fee"] == list(s.stripped_strings)
        assert ["fee "] == list(s._all_strings())

        assert "fie " == cdata.get_text()
        assert "fie" == cdata.get_text(strip=True)
        assert ["fie "] == list(cdata.strings)
        assert ["fie"] == list(cdata.stripped_strings)
        assert ["fie "] == list(cdata._all_strings())
        
        # Since a Comment isn't normally considered 'text',
        # these methods generally do nothing.
        assert "" == comment.get_text()
        assert [] == list(comment.strings)
        assert [] == list(comment.stripped_strings)
        assert [] == list(comment._all_strings())

        # Unless you specifically say that comments are okay.
        assert "foe" == comment.get_text(strip=True, types=Comment)
        assert "foe " == comment.get_text(types=(Comment, NavigableString))

    def test_string_has_immutable_name_property(self):
        # string.name is defined as None and can't be modified
        string = self.soup("s").string
        assert None == string.name
        with pytest.raises(AttributeError):
            string.name = 'foo'

class TestNavigableStringSubclasses(SoupTest):

    def test_cdata(self):
        # None of the current builders turn CDATA sections into CData
        # objects, but you can create them manually.
        soup = self.soup("")
        cdata = CData("foo")
        soup.insert(1, cdata)
        assert str(soup) == "<![CDATA[foo]]>"
        assert soup.find(string="foo") == "foo"
        assert soup.contents[0] == "foo"

    def test_cdata_is_never_formatted(self):
        """Text inside a CData object is passed into the formatter.

        But the return value is ignored.
        """

        self.count = 0
        def increment(*args):
            self.count += 1
            return "BITTER FAILURE"

        soup = self.soup("")
        cdata = CData("<><><>")
        soup.insert(1, cdata)
        assert b"<![CDATA[<><><>]]>" == soup.encode(formatter=increment)
        assert 1 == self.count

    def test_doctype_ends_in_newline(self):
        # Unlike other NavigableString subclasses, a DOCTYPE always ends
        # in a newline.
        doctype = Doctype("foo")
        soup = self.soup("")
        soup.insert(1, doctype)
        assert soup.encode() == b"<!DOCTYPE foo>\n"

    def test_declaration(self):
        d = Declaration("foo")
        assert "<?foo?>" == d.output_ready()

    def test_default_string_containers(self):
        # In some cases, we use different NavigableString subclasses for
        # the same text in different tags.
        soup = self.soup(
            "<div>text</div><script>text</script><style>text</style>"
        )
        assert [NavigableString, Script, Stylesheet] == [
            x.__class__ for x in soup.find_all(string=True)
        ]

        # The TemplateString is a little unusual because it's generally found
        # _inside_ children of a <template> element, not a direct child of the
        # <template> element.
        soup = self.soup(
            "<template>Some text<p>In a tag</p></template>Some text outside"
        )
        assert all(
            isinstance(x, TemplateString)
            for x in soup.template._all_strings(types=None)
        )
        
        # Once the <template> tag closed, we went back to using
        # NavigableString.
        outside = soup.template.next_sibling
        assert isinstance(outside, NavigableString)
        assert not isinstance(outside, TemplateString)

        # The TemplateString is also unusual because it can contain
        # NavigableString subclasses of _other_ types, such as
        # Comment.
        markup = b"<template>Some text<p>In a tag</p><!--with a comment--></template>"
        soup = self.soup(markup)
        assert markup == soup.template.encode("utf8")

    def test_ruby_strings(self):
        markup = "<ruby>漢 <rp>(</rp><rt>kan</rt><rp>)</rp> 字 <rp>(</rp><rt>ji</rt><rp>)</rp></ruby>"
        soup = self.soup(markup)
        assert isinstance(soup.rp.string, RubyParenthesisString)
        assert isinstance(soup.rt.string, RubyTextString)

        # Just as a demo, here's what this means for get_text usage.
        assert "漢字" == soup.get_text(strip=True)
        assert "漢(kan)字(ji)" == soup.get_text(
            strip=True,
            types=(NavigableString, RubyTextString, RubyParenthesisString)
        )