File size: 1,812 Bytes
a85c9b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import tempfile

import pytest

from embedchain.loaders.xml import XmlLoader

# Taken from https://github.com/langchain-ai/langchain/blob/master/libs/langchain/tests/integration_tests/examples/factbook.xml
SAMPLE_XML = """<?xml version="1.0" encoding="UTF-8"?>
<factbook>
  <country>
    <name>United States</name>
    <capital>Washington, DC</capital>
    <leader>Joe Biden</leader>
    <sport>Baseball</sport>
  </country>
  <country>
    <name>Canada</name>
    <capital>Ottawa</capital>
    <leader>Justin Trudeau</leader>
    <sport>Hockey</sport>
  </country>
  <country>
    <name>France</name>
    <capital>Paris</capital>
    <leader>Emmanuel Macron</leader>
    <sport>Soccer</sport>
  </country>
  <country>
    <name>Trinidad &amp; Tobado</name>
    <capital>Port of Spain</capital>
    <leader>Keith Rowley</leader>
    <sport>Track &amp; Field</sport>
  </country>
</factbook>"""


@pytest.mark.parametrize("xml", [SAMPLE_XML])
def test_load_data(xml: str):
    """
    Test XML loader

    Tests that XML file is loaded, metadata is correct and content is correct
    """
    # Creating temporary XML file
    with tempfile.NamedTemporaryFile(mode="w+") as tmpfile:
        tmpfile.write(xml)

        tmpfile.seek(0)
        filename = tmpfile.name

        # Loading CSV using XmlLoader
        loader = XmlLoader()
        result = loader.load_data(filename)
        data = result["data"]

        # Assertions
        assert len(data) == 1
        assert "United States Washington, DC Joe Biden" in data[0]["content"]
        assert "Canada Ottawa Justin Trudeau" in data[0]["content"]
        assert "France Paris Emmanuel Macron" in data[0]["content"]
        assert "Trinidad & Tobado Port of Spain Keith Rowley" in data[0]["content"]
        assert data[0]["meta_data"]["url"] == filename