import unittest
from bs4 import BeautifulSoup
import beautiful_soup
class BeautifulSoupTest(unittest.TestCase):
def setUp(self):
self.html = '''
Paragraph
Text within div
'''
def test_main_tag(self):
soup = BeautifulSoup( self.html, 'html.parser' )
self.assertEqual( beautiful_soup.get_main( soup ).name, 'main' )
soup = BeautifulSoup( "", 'html.parser' )
self.assertEqual( beautiful_soup.get_main( soup ).name, 'main' )
def test_has_no_div_childre(self):
childless = '''
'''
soup = BeautifulSoup( childless, 'html.parser' )
# self.assertFalse( beautiful_soup.has_no_div_children( soup.body ) )
# self.assertTrue( beautiful_soup.has_no_div_children( soup.body.div ) )
nested_div = '''
'''
soup = BeautifulSoup( nested_div, 'html.parser' )
# self.assertFalse( beautiful_soup.has_no_div_children( soup.body.div ) )
def test_get_deepest_divs(self):
nested_div = '''
'''
soup = BeautifulSoup( nested_div, 'html.parser' )
self.assertEqual( beautiful_soup.get_deepest_divs( soup.body )[0].text, 'Text in paragraph.' )
def test_list(self):
nested_div = '''
'''
soup = BeautifulSoup( nested_div, 'html.parser' )
divs = beautiful_soup.get_deepest_divs( soup.body )
# self.assertEqual( beautiful_soup.get_list_text( divs )[0], 'Text in list.' )
def test_exlcude_links(self):
nested_div = '''
I DONT WANT THIS
blablalba I WANT THIS blalba
I WANT THIS blalba
blablalba I WANT THIS
I WANT THIS
blablalba I WANT THIS blalba
I WANT THIS blalba
blablalba I WANT THIS
'''
soup = BeautifulSoup( nested_div, 'html.parser' )
list_items = soup.find_all(beautiful_soup.find_direct_text)
results = [
'blablalba I WANT THIS blalba',
'I WANT THIS blalba',
'blablalba I WANT THIS',
'I WANT THIS',
'blablalba I WANT THIS blalba',
'I WANT THIS blalba',
'blablalba I WANT THIS'
]
print(list_items)
# for item in list_items:
# print('item.get_text(): ' + item.get_text())
# help(list_items)
for i, item in enumerate(list_items):
self.assertEqual( item.get_text(), results[i] )
# self.assertEqual( list_items[0].get_text(), 'blablalba I WANT THIS blalba' )
# self.assertEqual( list_items[1].get_text(), 'I WANT THI Sblalba' )
# self.assertEqual( list_items[2].get_text(), 'blablalba I WANT THIS' )
if __name__ == '__main__':
unittest.main()