Bhuvaneshvar
/

cmrit

Joblib

Model card Files Files and versions Community

cmrit / cmrithackathon-master /.venv /lib /python3.11 /site-packages /bs4 /tests /test_navigablestring.py

Bhuvaneshvar

Upload 2116 files

6370773 verified about 2 months ago

raw

history blame

5.08 kB

	import pytest

	from bs4.element import (
	CData,
	Comment,
	Declaration,
	Doctype,
	NavigableString,
	RubyParenthesisString,
	RubyTextString,
	Script,
	Stylesheet,
	TemplateString,
	)

	from . import SoupTest

	class TestNavigableString(SoupTest):

	def test_text_acquisition_methods(self):
	# These methods are intended for use against Tag, but they
	# work on NavigableString as well,

	s = NavigableString("fee ")
	cdata = CData("fie ")
	comment = Comment("foe ")

	assert "fee " == s.get_text()
	assert "fee" == s.get_text(strip=True)
	assert ["fee "] == list(s.strings)
	assert ["fee"] == list(s.stripped_strings)
	assert ["fee "] == list(s._all_strings())

	assert "fie " == cdata.get_text()
	assert "fie" == cdata.get_text(strip=True)
	assert ["fie "] == list(cdata.strings)
	assert ["fie"] == list(cdata.stripped_strings)
	assert ["fie "] == list(cdata._all_strings())

	# Since a Comment isn't normally considered 'text',
	# these methods generally do nothing.
	assert "" == comment.get_text()
	assert [] == list(comment.strings)
	assert [] == list(comment.stripped_strings)
	assert [] == list(comment._all_strings())

	# Unless you specifically say that comments are okay.
	assert "foe" == comment.get_text(strip=True, types=Comment)
	assert "foe " == comment.get_text(types=(Comment, NavigableString))

	def test_string_has_immutable_name_property(self):
	# string.name is defined as None and can't be modified
	string = self.soup("s").string
	assert None == string.name
	with pytest.raises(AttributeError):
	string.name = 'foo'

	class TestNavigableStringSubclasses(SoupTest):

	def test_cdata(self):
	# None of the current builders turn CDATA sections into CData
	# objects, but you can create them manually.
	soup = self.soup("")
	cdata = CData("foo")
	soup.insert(1, cdata)
	assert str(soup) == "<![CDATA[foo]]>"
	assert soup.find(string="foo") == "foo"
	assert soup.contents[0] == "foo"

	def test_cdata_is_never_formatted(self):
	"""Text inside a CData object is passed into the formatter.

	But the return value is ignored.
	"""

	self.count = 0
	def increment(*args):
	self.count += 1
	return "BITTER FAILURE"

	soup = self.soup("")
	cdata = CData("<><><>")
	soup.insert(1, cdata)
	assert b"<![CDATA[<><><>]]>" == soup.encode(formatter=increment)
	assert 1 == self.count

	def test_doctype_ends_in_newline(self):
	# Unlike other NavigableString subclasses, a DOCTYPE always ends
	# in a newline.
	doctype = Doctype("foo")
	soup = self.soup("")
	soup.insert(1, doctype)
	assert soup.encode() == b"<!DOCTYPE foo>\n"

	def test_declaration(self):
	d = Declaration("foo")
	assert "<?foo?>" == d.output_ready()

	def test_default_string_containers(self):
	# In some cases, we use different NavigableString subclasses for
	# the same text in different tags.
	soup = self.soup(
	"<div>text</div><script>text</script><style>text</style>"
	)
	assert [NavigableString, Script, Stylesheet] == [
	x.__class__ for x in soup.find_all(string=True)
	]

	# The TemplateString is a little unusual because it's generally found
	# _inside_ children of a <template> element, not a direct child of the
	# <template> element.
	soup = self.soup(
	"<template>Some text<p>In a tag</p></template>Some text outside"
	)
	assert all(
	isinstance(x, TemplateString)
	for x in soup.template._all_strings(types=None)
	)

	# Once the <template> tag closed, we went back to using
	# NavigableString.
	outside = soup.template.next_sibling
	assert isinstance(outside, NavigableString)
	assert not isinstance(outside, TemplateString)

	# The TemplateString is also unusual because it can contain
	# NavigableString subclasses of _other_ types, such as
	# Comment.
	markup = b"<template>Some text<p>In a tag</p><!--with a comment--></template>"
	soup = self.soup(markup)
	assert markup == soup.template.encode("utf8")

	def test_ruby_strings(self):
	markup = "<ruby>漢 <rp>(</rp><rt>kan</rt><rp>)</rp> 字 <rp>(</rp><rt>ji</rt><rp>)</rp></ruby>"
	soup = self.soup(markup)
	assert isinstance(soup.rp.string, RubyParenthesisString)
	assert isinstance(soup.rt.string, RubyTextString)

	# Just as a demo, here's what this means for get_text usage.
	assert "漢字" == soup.get_text(strip=True)
	assert "漢(kan)字(ji)" == soup.get_text(
	strip=True,
	types=(NavigableString, RubyTextString, RubyParenthesisString)
	)