perplexity-lenses / tests /test_data.py
edugp's picture
Add tests and fix issue when splitting into sentences, to grab the minimum number between total sentences and sample size, rather than total original documents and sample size
d131aa3
raw history blame
No virus
488 Bytes
import unittest
import pandas as pd
from perplexity_lenses.data import documents_df_to_sentences_df
class TestData(unittest.TestCase):
def test_documents_df_to_sentences_df(self):
input_df = pd.DataFrame({"text": ["foo\nbar"]})
expected_output_df = pd.DataFrame({"text": ["foo", "bar"]})
output_df = documents_df_to_sentences_df(input_df, "text", 100)
pd.testing.assert_frame_equal(output_df, expected_output_df, check_like=True, check_exact=True)