import tempfile import pytest from embedchain.loaders.xml import XmlLoader # Taken from https://github.com/langchain-ai/langchain/blob/master/libs/langchain/tests/integration_tests/examples/factbook.xml SAMPLE_XML = """ United States Washington, DC Joe Biden Baseball Canada Ottawa Justin Trudeau Hockey France Paris Emmanuel Macron Soccer Trinidad & Tobado Port of Spain Keith Rowley Track & Field """ @pytest.mark.parametrize("xml", [SAMPLE_XML]) def test_load_data(xml: str): """ Test XML loader Tests that XML file is loaded, metadata is correct and content is correct """ # Creating temporary XML file with tempfile.NamedTemporaryFile(mode="w+") as tmpfile: tmpfile.write(xml) tmpfile.seek(0) filename = tmpfile.name # Loading CSV using XmlLoader loader = XmlLoader() result = loader.load_data(filename) data = result["data"] # Assertions assert len(data) == 1 assert "United States Washington, DC Joe Biden" in data[0]["content"] assert "Canada Ottawa Justin Trudeau" in data[0]["content"] assert "France Paris Emmanuel Macron" in data[0]["content"] assert "Trinidad & Tobado Port of Spain Keith Rowley" in data[0]["content"] assert data[0]["meta_data"]["url"] == filename