Spaces:
Build error
Build error
| import pytest | |
| import numpy as np | |
| from src import file_readers | |
| import test_config | |
| def test_preprocess_text(): | |
| """ | |
| Tests preprocess function by asserting title, | |
| shape of corpus, and correct line reading. | |
| """ | |
| test_path = test_config.data_path / "test.txt" | |
| processed_path = test_config.data_path / "test_processed.txt" | |
| with open(test_path, 'r') as file: | |
| test_corpus = file_readers.preprocess_text(file) | |
| with open(processed_path, 'r') as process_file: | |
| processed_corpus = [line.strip() for line in process_file.readlines()] | |
| assert processed_corpus == test_corpus | |
| def test_read_pdf(): | |
| pdf_path = test_config.data_path / "test.pdf" | |
| corpus = np.array(file_readers.read_pdf(pdf_path), dtype=object) | |
| assert np.shape(corpus) == (4, ) | |
| assert np.shape(corpus[0]) == (3, ) | |
| assert corpus[0][0] == 'Lorem Ipsum' | |
| assert corpus[2][0] == 'Preface' | |
| def test_read_epub(): | |
| """ | |
| Tests read_epub function by asserting title, | |
| shape of corpus, and correct line reading. | |
| """ | |
| ebook_path = test_config.data_path / "test.epub" | |
| corpus, title = file_readers.read_epub(ebook_path) | |
| corpus_arr = np.array(corpus, dtype=object) | |
| assert title == "the_picture_of_dorian_gray" | |
| assert np.shape(corpus_arr) == (6,) | |
| assert np.shape(corpus_arr[0]) == (39,) | |
| assert corpus[0][0] == 'The Project Gutenberg eBook of The Picture of Dorian Gray, by Oscar Wilde' | |
| assert corpus[2][0] == 'CHAPTER I.' | |