Spaces:
No application file
No application file
File size: 1,812 Bytes
a85c9b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import tempfile
import pytest
from embedchain.loaders.xml import XmlLoader
# Taken from https://github.com/langchain-ai/langchain/blob/master/libs/langchain/tests/integration_tests/examples/factbook.xml
SAMPLE_XML = """<?xml version="1.0" encoding="UTF-8"?>
<factbook>
<country>
<name>United States</name>
<capital>Washington, DC</capital>
<leader>Joe Biden</leader>
<sport>Baseball</sport>
</country>
<country>
<name>Canada</name>
<capital>Ottawa</capital>
<leader>Justin Trudeau</leader>
<sport>Hockey</sport>
</country>
<country>
<name>France</name>
<capital>Paris</capital>
<leader>Emmanuel Macron</leader>
<sport>Soccer</sport>
</country>
<country>
<name>Trinidad & Tobado</name>
<capital>Port of Spain</capital>
<leader>Keith Rowley</leader>
<sport>Track & Field</sport>
</country>
</factbook>"""
@pytest.mark.parametrize("xml", [SAMPLE_XML])
def test_load_data(xml: str):
"""
Test XML loader
Tests that XML file is loaded, metadata is correct and content is correct
"""
# Creating temporary XML file
with tempfile.NamedTemporaryFile(mode="w+") as tmpfile:
tmpfile.write(xml)
tmpfile.seek(0)
filename = tmpfile.name
# Loading CSV using XmlLoader
loader = XmlLoader()
result = loader.load_data(filename)
data = result["data"]
# Assertions
assert len(data) == 1
assert "United States Washington, DC Joe Biden" in data[0]["content"]
assert "Canada Ottawa Justin Trudeau" in data[0]["content"]
assert "France Paris Emmanuel Macron" in data[0]["content"]
assert "Trinidad & Tobado Port of Spain Keith Rowley" in data[0]["content"]
assert data[0]["meta_data"]["url"] == filename
|