| """This file contains test cases reported by third parties using |
| fuzzing tools, primarily from Google's oss-fuzz project. Some of these |
| represent real problems with Beautiful Soup, but many are problems in |
| libraries that Beautiful Soup depends on, and many of the test cases |
| represent different ways of triggering the same problem. |
| |
| Grouping these test cases together makes it easy to see which test |
| cases represent the same problem, and puts the test cases in close |
| proximity to code that can trigger the problems. |
| """ |
| import os |
| import pytest |
| from bs4 import ( |
| BeautifulSoup, |
| ParserRejectedMarkup, |
| ) |
|
|
| class TestFuzz(object): |
|
|
| |
| |
| TESTCASE_SUFFIX = ".testcase" |
|
|
| |
| |
| |
| @pytest.mark.parametrize( |
| "filename", [ |
| "clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912", |
| ] |
| ) |
| def test_rejected_markup(self, filename): |
| markup = self.__markup(filename) |
| with pytest.raises(ParserRejectedMarkup): |
| BeautifulSoup(markup, 'html.parser') |
|
|
| |
| |
| |
| |
| @pytest.mark.parametrize( |
| "filename", [ |
| "clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440", |
| "clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632", |
| "clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464", |
| "clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400", |
| ] |
| ) |
| def test_deeply_nested_document(self, filename): |
| |
| |
| |
| markup = self.__markup(filename) |
| BeautifulSoup(markup, 'html.parser').encode() |
|
|
| |
| |
| |
| |
| @pytest.mark.skip("html5lib problems") |
| @pytest.mark.parametrize( |
| "filename", [ |
| |
| "clusterfuzz-testcase-minimized-bs4_fuzzer-4818336571064320", |
| |
| |
| "clusterfuzz-testcase-minimized-bs4_fuzzer-4999465949331456", |
| |
| |
| "clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896", |
| |
| |
| "clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224", |
| |
| |
| "clusterfuzz-testcase-minimized-bs4_fuzzer-6600557255327744", |
| |
| |
| "crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08" |
| ] |
| ) |
| def test_html5lib_parse_errors(self, filename): |
| markup = self.__markup(filename) |
| print(BeautifulSoup(markup, 'html5lib').encode()) |
|
|
| def __markup(self, filename): |
| if not filename.endswith(self.TESTCASE_SUFFIX): |
| filename += self.TESTCASE_SUFFIX |
| this_dir = os.path.split(__file__)[0] |
| path = os.path.join(this_dir, 'fuzz', filename) |
| return open(path, 'rb').read() |
|
|