import unittest from bs4 import BeautifulSoup import beautiful_soup class BeautifulSoupTest(unittest.TestCase): def setUp(self): self.html = '''

Paragraph

  • List Item
Text within div
''' def test_main_tag(self): soup = BeautifulSoup( self.html, 'html.parser' ) self.assertEqual( beautiful_soup.get_main( soup ).name, 'main' ) soup = BeautifulSoup( "", 'html.parser' ) self.assertEqual( beautiful_soup.get_main( soup ).name, 'main' ) def test_has_no_div_childre(self): childless = '''

Text in div.

''' soup = BeautifulSoup( childless, 'html.parser' ) # self.assertFalse( beautiful_soup.has_no_div_children( soup.body ) ) # self.assertTrue( beautiful_soup.has_no_div_children( soup.body.div ) ) nested_div = '''
Text in paragraph.
''' soup = BeautifulSoup( nested_div, 'html.parser' ) # self.assertFalse( beautiful_soup.has_no_div_children( soup.body.div ) ) def test_get_deepest_divs(self): nested_div = '''

Text in paragraph.

''' soup = BeautifulSoup( nested_div, 'html.parser' ) self.assertEqual( beautiful_soup.get_deepest_divs( soup.body )[0].text, 'Text in paragraph.' ) def test_list(self): nested_div = '''
''' soup = BeautifulSoup( nested_div, 'html.parser' ) divs = beautiful_soup.get_deepest_divs( soup.body ) # self.assertEqual( beautiful_soup.get_list_text( divs )[0], 'Text in list.' ) def test_exlcude_links(self): nested_div = '''
  • I DONT WANT THIS
  • blablalba I WANT THIS blalba
  • I WANT THIS blalba
  • blablalba I WANT THIS
  • I WANT THIS

    blablalba I WANT THIS blalba

    I WANT THIS blalba

    blablalba I WANT THIS

    ''' soup = BeautifulSoup( nested_div, 'html.parser' ) list_items = soup.find_all(beautiful_soup.find_direct_text) results = [ 'blablalba I WANT THIS blalba', 'I WANT THIS blalba', 'blablalba I WANT THIS', 'I WANT THIS', 'blablalba I WANT THIS blalba', 'I WANT THIS blalba', 'blablalba I WANT THIS' ] print(list_items) # for item in list_items: # print('item.get_text(): ' + item.get_text()) # help(list_items) for i, item in enumerate(list_items): self.assertEqual( item.get_text(), results[i] ) # self.assertEqual( list_items[0].get_text(), 'blablalba I WANT THIS blalba' ) # self.assertEqual( list_items[1].get_text(), 'I WANT THI Sblalba' ) # self.assertEqual( list_items[2].get_text(), 'blablalba I WANT THIS' ) if __name__ == '__main__': unittest.main()